From 05ed277f5e3abd72d21dd836808c762b5c852ebe Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Thu, 16 Jan 2025 22:56:36 -0800 Subject: [PATCH 01/15] feat(graphql/versioning): Add versioning support to graphql; mutations return version set (#12358) Co-authored-by: RyanHolstien --- .../linkedin/datahub/graphql/Constants.java | 1 + .../datahub/graphql/GmsGraphQLEngine.java | 53 +++- .../resolvers/config/AppConfigResolver.java | 1 + .../versioning/LinkAssetVersionResolver.java | 38 ++- .../UnlinkAssetVersionResolver.java | 19 +- .../graphql/resolvers/search/SearchUtils.java | 107 +++++++ .../versioning/VersionsSearchResolver.java | 87 ++++++ .../mappers/SearchFlagsInputMapper.java | 3 + .../common/mappers/UrnToEntityMapper.java | 6 + .../graphql/types/dataset/DatasetType.java | 3 +- .../types/dataset/mappers/DatasetMapper.java | 7 + .../types/mlmodel/mappers/MLModelMapper.java | 7 + .../versioning/VersionPropertiesMapper.java | 53 ++++ .../types/versioning/VersionSetMapper.java | 47 +++ .../types/versioning/VersionSetType.java | 79 +++++ .../src/main/resources/app.graphql | 7 +- .../src/main/resources/entity.graphql | 65 +--- .../src/main/resources/search.graphql | 7 +- .../src/main/resources/versioning.graphql | 148 +++++++++ .../LinkAssetVersionResolverTest.java | 3 +- .../UnlinkAssetVersionResolverTest.java | 2 +- .../VersionsSearchResolverTest.java | 294 ++++++++++++++++++ datahub-web-react/src/Mocks.tsx | 1 + datahub-web-react/src/appConfigContext.tsx | 1 + datahub-web-react/src/graphql/app.graphql | 1 + datahub-web-react/src/graphql/dataset.graphql | 1 + datahub-web-react/src/graphql/mlModel.graphql | 1 + datahub-web-react/src/graphql/preview.graphql | 5 + datahub-web-react/src/graphql/search.graphql | 5 + .../src/graphql/versioning.graphql | 89 ++++++ 30 files changed, 1055 insertions(+), 86 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java create mode 100644 datahub-graphql-core/src/main/resources/versioning.graphql create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java create mode 100644 datahub-web-react/src/graphql/versioning.graphql diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java index 69306862a46ef7..aec5352dec1a64 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java @@ -28,6 +28,7 @@ private Constants() {} public static final String INCIDENTS_SCHEMA_FILE = "incident.graphql"; public static final String CONTRACTS_SCHEMA_FILE = "contract.graphql"; public static final String CONNECTIONS_SCHEMA_FILE = "connection.graphql"; + public static final String VERSION_SCHEMA_FILE = "versioning.graphql"; public static final String BROWSE_PATH_DELIMITER = "/"; public static final String BROWSE_PATH_V2_DELIMITER = "␟"; public static final String VERSION_STAMP_FIELD_NAME = "versionStamp"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index b15db80a8487ae..403e80a71380be 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -124,6 +124,8 @@ import com.linkedin.datahub.graphql.generated.TestResult; import com.linkedin.datahub.graphql.generated.TypeQualifier; import com.linkedin.datahub.graphql.generated.UserUsageCounts; +import com.linkedin.datahub.graphql.generated.VersionProperties; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.datahub.graphql.resolvers.MeResolver; import com.linkedin.datahub.graphql.resolvers.assertion.AssertionRunEventResolver; import com.linkedin.datahub.graphql.resolvers.assertion.DeleteAssertionResolver; @@ -324,6 +326,7 @@ import com.linkedin.datahub.graphql.resolvers.user.ListUsersResolver; import com.linkedin.datahub.graphql.resolvers.user.RemoveUserResolver; import com.linkedin.datahub.graphql.resolvers.user.UpdateUserStatusResolver; +import com.linkedin.datahub.graphql.resolvers.versioning.VersionsSearchResolver; import com.linkedin.datahub.graphql.resolvers.view.CreateViewResolver; import com.linkedin.datahub.graphql.resolvers.view.DeleteViewResolver; import com.linkedin.datahub.graphql.resolvers.view.ListGlobalViewsResolver; @@ -381,6 +384,7 @@ import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertyType; import com.linkedin.datahub.graphql.types.tag.TagType; import com.linkedin.datahub.graphql.types.test.TestType; +import com.linkedin.datahub.graphql.types.versioning.VersionSetType; import com.linkedin.datahub.graphql.types.view.DataHubViewType; import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.client.SystemEntityClient; @@ -537,6 +541,7 @@ public class GmsGraphQLEngine { private final IncidentType incidentType; private final RestrictedType restrictedType; private final DataProcessInstanceType dataProcessInstanceType; + private final VersionSetType versionSetType; private final int graphQLQueryComplexityLimit; private final int graphQLQueryDepthLimit; @@ -658,6 +663,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.incidentType = new IncidentType(entityClient); this.restrictedType = new RestrictedType(entityClient, restrictedService); this.dataProcessInstanceType = new DataProcessInstanceType(entityClient, featureFlags); + this.versionSetType = new VersionSetType(entityClient); this.graphQLQueryComplexityLimit = args.graphQLQueryComplexityLimit; this.graphQLQueryDepthLimit = args.graphQLQueryDepthLimit; @@ -707,6 +713,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { entityTypeType, formType, incidentType, + versionSetType, restrictedType, businessAttributeType, dataProcessInstanceType)); @@ -809,6 +816,8 @@ public void configureRuntimeWiring(final RuntimeWiring.Builder builder) { configureConnectionResolvers(builder); configureDeprecationResolvers(builder); configureMetadataAttributionResolver(builder); + configureVersionPropertiesResolvers(builder); + configureVersionSetResolvers(builder); } private void configureOrganisationRoleResolvers(RuntimeWiring.Builder builder) { @@ -863,7 +872,8 @@ public GraphQLEngine.Builder builder() { .addSchema(fileBasedSchema(ASSERTIONS_SCHEMA_FILE)) .addSchema(fileBasedSchema(INCIDENTS_SCHEMA_FILE)) .addSchema(fileBasedSchema(CONTRACTS_SCHEMA_FILE)) - .addSchema(fileBasedSchema(COMMON_SCHEMA_FILE)); + .addSchema(fileBasedSchema(COMMON_SCHEMA_FILE)) + .addSchema(fileBasedSchema(VERSION_SCHEMA_FILE)); for (GmsGraphQLPlugin plugin : this.graphQLPlugins) { List pluginSchemaFiles = plugin.getSchemaFiles(); @@ -1050,6 +1060,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("form", getResolver(formType)) .dataFetcher("view", getResolver(dataHubViewType)) .dataFetcher("structuredProperty", getResolver(structuredPropertyType)) + .dataFetcher("versionSet", getResolver(versionSetType)) .dataFetcher("listPolicies", new ListPoliciesResolver(this.entityClient)) .dataFetcher("getGrantedPrivileges", new GetGrantedPrivilegesResolver()) .dataFetcher("listUsers", new ListUsersResolver(this.entityClient)) @@ -2295,7 +2306,15 @@ private void configureTypeResolvers(final RuntimeWiring.Builder builder) { .type( "TimeSeriesAspect", typeWiring -> typeWiring.typeResolver(new TimeSeriesAspectInterfaceTypeResolver())) - .type("ResultsType", typeWiring -> typeWiring.typeResolver(new ResultsTypeResolver())); + .type("ResultsType", typeWiring -> typeWiring.typeResolver(new ResultsTypeResolver())) + .type( + "SupportsVersions", + typeWiring -> + typeWiring.typeResolver( + new EntityInterfaceTypeResolver( + loadableTypes.stream() + .map(graphType -> (EntityType) graphType) + .collect(Collectors.toList())))); } /** Configures custom type extensions leveraged within our GraphQL schema. */ @@ -3322,4 +3341,34 @@ private void configureMetadataAttributionResolver(final RuntimeWiring.Builder bu entityTypes, (env) -> ((MetadataAttribution) env.getSource()).getSource()))); } + + private void configureVersionPropertiesResolvers(final RuntimeWiring.Builder builder) { + builder.type( + "VersionProperties", + typeWiring -> + typeWiring.dataFetcher( + "versionSet", + new LoadableTypeResolver<>( + versionSetType, + (env) -> { + final VersionProperties versionProperties = env.getSource(); + return versionProperties != null + ? versionProperties.getVersionSet().getUrn() + : null; + }))); + } + + private void configureVersionSetResolvers(final RuntimeWiring.Builder builder) { + builder.type( + "VersionSet", + typeWiring -> + typeWiring + .dataFetcher( + "latestVersion", + new EntityTypeResolver( + entityTypes, (env) -> ((VersionSet) env.getSource()).getLatestVersion())) + .dataFetcher( + "versionsSearch", + new VersionsSearchResolver(this.entityClient, this.viewService))); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java index 3647eb55b2583a..8cdc13a14be87c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java @@ -189,6 +189,7 @@ public CompletableFuture get(final DataFetchingEnvironment environmen .setEditableDatasetNameEnabled(_featureFlags.isEditableDatasetNameEnabled()) .setShowSeparateSiblings(_featureFlags.isShowSeparateSiblings()) .setShowManageStructuredProperties(_featureFlags.isShowManageStructuredProperties()) + .setEntityVersioningEnabled(_featureFlags.isEntityVersioning()) .build(); appConfig.setFeatureFlags(featureFlagsConfig); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java index 69e049af1e87b7..f32fd03a384005 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java @@ -12,7 +12,9 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; @@ -21,24 +23,22 @@ import io.datahubproject.metadata.context.OperationContext; import java.util.List; import java.util.concurrent.CompletableFuture; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang.StringUtils; /** * Currently only supports linking the latest version, but may be modified later to support inserts */ -public class LinkAssetVersionResolver implements DataFetcher> { +@Slf4j +@RequiredArgsConstructor +public class LinkAssetVersionResolver implements DataFetcher> { private final EntityVersioningService entityVersioningService; private final FeatureFlags featureFlags; - public LinkAssetVersionResolver( - EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { - this.entityVersioningService = entityVersioningService; - this.featureFlags = featureFlags; - } - @Override - public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); final LinkVersionInput input = bindArgument(environment.getArgument("input"), LinkVersionInput.class); @@ -75,12 +75,22 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws entityVersioningService.linkLatestVersion( opContext, versionSetUrn, entityUrn, versionPropertiesInput); - return linkResults.stream() - .filter( - ingestResult -> input.getLinkedEntity().equals(ingestResult.getUrn().toString())) - .map(ingestResult -> ingestResult.getUrn().toString()) - .findAny() - .orElse(StringUtils.EMPTY); + String successVersionSetUrn = + linkResults.stream() + .filter( + ingestResult -> + input.getLinkedEntity().equals(ingestResult.getUrn().toString())) + .map(ingestResult -> ingestResult.getUrn().toString()) + .findAny() + .orElse(StringUtils.EMPTY); + + if (StringUtils.isEmpty(successVersionSetUrn)) { + return null; + } + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(versionSetUrn.toString()); + versionSet.setType(EntityType.VERSION_SET); + return versionSet; }, this.getClass().getSimpleName(), "get"); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java index 3d5027a0d668ac..33ab83a59c6771 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java @@ -12,14 +12,18 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.metadata.entity.RollbackResult; import com.linkedin.metadata.entity.versioning.EntityVersioningService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; +import java.util.List; import java.util.concurrent.CompletableFuture; -public class UnlinkAssetVersionResolver implements DataFetcher> { +public class UnlinkAssetVersionResolver implements DataFetcher> { private final EntityVersioningService entityVersioningService; private final FeatureFlags featureFlags; @@ -31,7 +35,7 @@ public UnlinkAssetVersionResolver( } @Override - public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { if (!featureFlags.isEntityVersioning()) { throw new IllegalAccessError( "Entity Versioning is not configured, please enable before attempting to use this feature."); @@ -58,8 +62,15 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } return GraphQLConcurrencyUtils.supplyAsync( () -> { - entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); - return true; + List results = + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + if (results.isEmpty() || results.stream().allMatch(RollbackResult::isNoOp)) { + return null; + } + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(versionSetUrn.toString()); + versionSet.setType(EntityType.VERSION_SET); + return versionSet; }, this.getClass().getSimpleName(), "get"); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java index a01b3aaec9c982..f105a72a1273ee 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java @@ -18,13 +18,18 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.SearchResults; import com.linkedin.datahub.graphql.generated.SearchSortInput; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.common.mappers.SearchFlagsInputMapper; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; +import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -33,24 +38,32 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.service.ViewService; import com.linkedin.view.DataHubViewInfo; import io.datahubproject.metadata.context.OperationContext; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; import org.codehaus.plexus.util.CollectionUtils; @Slf4j public class SearchUtils { private SearchUtils() {} + private static final int DEFAULT_SEARCH_COUNT = 10; + private static final int DEFAULT_SCROLL_COUNT = 10; + private static final String DEFAULT_SCROLL_KEEP_ALIVE = "5m"; + /** Entities that are searched by default in Search Across Entities */ public static final List SEARCHABLE_ENTITY_TYPES = ImmutableList.of( @@ -348,4 +361,98 @@ public static List getSortCriteria(@Nullable final SearchSortInpu return sortCriteria; } + + public static CompletableFuture searchAcrossEntities( + QueryContext inputContext, + final EntityClient _entityClient, + final ViewService _viewService, + List inputEntityTypes, + String inputQuery, + Filter baseFilter, + String viewUrn, + List sortCriteria, + com.linkedin.datahub.graphql.generated.SearchFlags inputSearchFlags, + Integer inputCount, + Integer inputStart, + String className) { + + final List entityTypes = + (inputEntityTypes == null || inputEntityTypes.isEmpty()) + ? SEARCHABLE_ENTITY_TYPES + : inputEntityTypes; + final List entityNames = + entityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()); + + // escape forward slash since it is a reserved character in Elasticsearch, default to * if + // blank/empty + final String query = + StringUtils.isNotBlank(inputQuery) ? ResolverUtils.escapeForwardSlash(inputQuery) : "*"; + + final Optional searchFlags = + Optional.ofNullable(inputSearchFlags) + .map((flags) -> SearchFlagsInputMapper.map(inputContext, flags)); + final OperationContext context = + inputContext.getOperationContext().withSearchFlags(searchFlags::orElse); + + final int count = Optional.ofNullable(inputCount).orElse(DEFAULT_SEARCH_COUNT); + final int start = Optional.ofNullable(inputStart).orElse(0); + + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + final OperationContext baseContext = inputContext.getOperationContext(); + final Optional maybeResolvedView = + Optional.ofNullable(viewUrn) + .map((urn) -> resolveView(baseContext, _viewService, UrnUtils.getUrn(urn))); + + final List finalEntityNames = + maybeResolvedView + .map( + (view) -> + intersectEntityTypes(entityNames, view.getDefinition().getEntityTypes())) + .orElse(entityNames); + + final Filter finalFilters = + maybeResolvedView + .map((view) -> combineFilters(baseFilter, view.getDefinition().getFilter())) + .orElse(baseFilter); + + log.debug( + "Executing search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", + finalEntityNames, + query, + finalFilters, + start, + count); + + try { + final SearchResult searchResult = + _entityClient.searchAcrossEntities( + context, + finalEntityNames, + query, + finalFilters, + start, + count, + sortCriteria, + null); + return UrnSearchResultsMapper.map(inputContext, searchResult); + } catch (Exception e) { + log.warn( + "Failed to execute search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", + finalEntityNames, + query, + finalFilters, + start, + count); + throw new RuntimeException( + "Failed to execute search: " + + String.format( + "entity types %s, query %s, filters: %s, start: %s, count: %s", + finalEntityNames, query, finalFilters, start, count), + e); + } + }, + className, + "searchAcrossEntities"); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java new file mode 100644 index 00000000000000..915e1cf00ebc6b --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java @@ -0,0 +1,87 @@ +package com.linkedin.datahub.graphql.resolvers.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.*; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.SearchAcrossEntitiesInput; +import com.linkedin.datahub.graphql.generated.SearchFlags; +import com.linkedin.datahub.graphql.generated.SearchResults; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; +import com.linkedin.datahub.graphql.resolvers.search.SearchUtils; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.service.ViewService; +import com.linkedin.metadata.utils.CriterionUtils; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Stream; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** Resolver that executes a searchAcrossEntities only on a version set's versioned entities */ +@Slf4j +@RequiredArgsConstructor +public class VersionsSearchResolver implements DataFetcher> { + + private static final String VERSION_SET_FIELD_NAME = "versionSet"; + + private final EntityClient _entityClient; + private final ViewService _viewService; + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) { + final Entity entity = environment.getSource(); + final QueryContext context = environment.getContext(); + final SearchAcrossEntitiesInput input = + bindArgument(environment.getArgument("input"), SearchAcrossEntitiesInput.class); + + final Criterion versionSetFilter = + CriterionUtils.buildCriterion(VERSION_SET_FIELD_NAME, Condition.EQUAL, entity.getUrn()); + final Filter baseFilter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion().setAnd(new CriterionArray(versionSetFilter)))); + final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + + final List initialSortCriteria = + SearchUtils.getSortCriteria(input.getSortInput()); + final List sortCriteria = + Stream.concat( + initialSortCriteria.stream(), + Stream.of( + new SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(SortOrder.DESCENDING))) + .toList(); + + SearchFlags searchFlags = Optional.ofNullable(input.getSearchFlags()).orElse(new SearchFlags()); + searchFlags.setFilterNonLatestVersions(false); + + return SearchUtils.searchAcrossEntities( + context, + _entityClient, + _viewService, + input.getTypes(), + input.getQuery(), + SearchUtils.combineFilters(inputFilter, baseFilter), + input.getViewUrn(), + sortCriteria, + searchFlags, + input.getCount(), + input.getStart(), + this.getClass().getSimpleName()); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java index 9f5025ccf303a2..0b3a445175c4c1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java @@ -69,6 +69,9 @@ public com.linkedin.metadata.query.SearchFlags apply( result.setCustomHighlightingFields( new StringArray(searchFlags.getCustomHighlightingFields())); } + if (searchFlags.getFilterNonLatestVersions() != null) { + result.setFilterNonLatestVersions(searchFlags.getFilterNonLatestVersions()); + } return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java index eae33e6da2e56d..b815c1b1c1dd9f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java @@ -41,6 +41,7 @@ import com.linkedin.datahub.graphql.generated.StructuredPropertyEntity; import com.linkedin.datahub.graphql.generated.Tag; import com.linkedin.datahub.graphql.generated.Test; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -231,6 +232,11 @@ public Entity apply(@Nullable QueryContext context, Urn input) { ((DataProcessInstance) partialEntity).setUrn(input.toString()); ((DataProcessInstance) partialEntity).setType(EntityType.DATA_PROCESS_INSTANCE); } + if (input.getEntityType().equals(VERSION_SET_ENTITY_NAME)) { + partialEntity = new VersionSet(); + ((VersionSet) partialEntity).setUrn(input.toString()); + ((VersionSet) partialEntity).setType(EntityType.VERSION_SET); + } return partialEntity; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index 6a3f9cb9b21f38..74ef4cf125cd24 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -89,7 +89,8 @@ public class DatasetType ACCESS_ASPECT_NAME, STRUCTURED_PROPERTIES_ASPECT_NAME, FORMS_ASPECT_NAME, - SUB_TYPES_ASPECT_NAME); + SUB_TYPES_ASPECT_NAME, + VERSION_PROPERTIES_ASPECT_NAME); private static final Set FACET_FIELDS = ImmutableSet.of("origin", "platform"); private static final String ENTITY_NAME = "dataset"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java index e411014c23c89b..aa7033b180e80e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java @@ -17,6 +17,7 @@ import com.linkedin.common.Status; import com.linkedin.common.SubTypes; import com.linkedin.common.TimeStamp; +import com.linkedin.common.VersionProperties; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; @@ -48,6 +49,7 @@ import com.linkedin.datahub.graphql.types.rolemetadata.mappers.AccessMapper; import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; +import com.linkedin.datahub.graphql.types.versioning.VersionPropertiesMapper; import com.linkedin.dataset.DatasetDeprecation; import com.linkedin.dataset.DatasetProperties; import com.linkedin.dataset.EditableDatasetProperties; @@ -183,6 +185,11 @@ public Dataset apply( SUB_TYPES_ASPECT_NAME, (dashboard, dataMap) -> dashboard.setSubTypes(SubTypesMapper.map(context, new SubTypes(dataMap)))); + mappingHelper.mapToResult( + VERSION_PROPERTIES_ASPECT_NAME, + (entity, dataMap) -> + entity.setVersionProperties( + VersionPropertiesMapper.map(context, new VersionProperties(dataMap)))); if (context != null && !canView(context.getOperationContext(), entityUrn)) { return AuthorizationUtils.restrictEntity(mappingHelper.getResult(), Dataset.class); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java index 7102fd4aed9743..11e6b5180f8c1c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java @@ -13,6 +13,7 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.VersionProperties; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; @@ -38,6 +39,7 @@ import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; +import com.linkedin.datahub.graphql.types.versioning.VersionPropertiesMapper; import com.linkedin.domain.Domains; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; @@ -180,6 +182,11 @@ public MLModel apply( FORMS_ASPECT_NAME, ((entity, dataMap) -> entity.setForms(FormsMapper.map(new Forms(dataMap), entityUrn.toString())))); + mappingHelper.mapToResult( + VERSION_PROPERTIES_ASPECT_NAME, + (entity, dataMap) -> + entity.setVersionProperties( + VersionPropertiesMapper.map(context, new VersionProperties(dataMap)))); if (context != null && !canView(context.getOperationContext(), entityUrn)) { return AuthorizationUtils.restrictEntity(mappingHelper.getResult(), MLModel.class); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java new file mode 100644 index 00000000000000..f89ebdc9f2b043 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java @@ -0,0 +1,53 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionProperties; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.datahub.graphql.types.mappers.MapperUtils; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.mlmodel.mappers.VersionTagMapper; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class VersionPropertiesMapper + implements ModelMapper { + public static final VersionPropertiesMapper INSTANCE = new VersionPropertiesMapper(); + + public static VersionProperties map( + @Nullable QueryContext context, + @Nonnull final com.linkedin.common.VersionProperties versionProperties) { + return INSTANCE.apply(context, versionProperties); + } + + @Override + public VersionProperties apply( + @Nullable QueryContext context, @Nonnull com.linkedin.common.VersionProperties input) { + final VersionProperties result = new VersionProperties(); + + result.setVersionSet( + VersionSet.builder() + .setUrn(input.getVersionSet().toString()) + .setType(EntityType.VERSION_SET) + .build()); + + result.setVersion(VersionTagMapper.map(context, input.getVersion())); + result.setAliases( + input.getAliases().stream() + .map(alias -> VersionTagMapper.map(context, alias)) + .collect(Collectors.toList())); + result.setComment(input.getComment()); + result.setIsLatest(Boolean.TRUE.equals(input.isIsLatest())); + + if (input.getMetadataCreatedTimestamp() != null) { + result.setCreated(MapperUtils.createResolvedAuditStamp(input.getMetadataCreatedTimestamp())); + } + if (input.getSourceCreatedTimestamp() != null) { + result.setCreatedInSource( + MapperUtils.createResolvedAuditStamp(input.getSourceCreatedTimestamp())); + } + + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java new file mode 100644 index 00000000000000..3a07115ece5f6e --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java @@ -0,0 +1,47 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.linkedin.data.DataMap; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; +import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspectMap; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class VersionSetMapper implements ModelMapper { + + public static final VersionSetMapper INSTANCE = new VersionSetMapper(); + + public static VersionSet map( + @Nullable QueryContext context, @Nonnull final EntityResponse entityResponse) { + return INSTANCE.apply(context, entityResponse); + } + + @Override + public VersionSet apply(@Nullable QueryContext context, @Nonnull EntityResponse entityResponse) { + final VersionSet result = new VersionSet(); + result.setUrn(entityResponse.getUrn().toString()); + result.setType(EntityType.VERSION_SET); + + EnvelopedAspectMap aspectMap = entityResponse.getAspects(); + MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); + mappingHelper.mapToResult( + VERSION_SET_PROPERTIES_ASPECT_NAME, + (versionSet, dataMap) -> mapVersionSetProperties(context, versionSet, dataMap)); + + return result; + } + + private void mapVersionSetProperties( + @Nullable QueryContext context, @Nonnull VersionSet versionSet, @Nonnull DataMap dataMap) { + com.linkedin.versionset.VersionSetProperties versionProperties = + new com.linkedin.versionset.VersionSetProperties(dataMap); + versionSet.setLatestVersion(UrnToEntityMapper.map(context, versionProperties.getLatest())); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java new file mode 100644 index 00000000000000..ed2beff4530949 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java @@ -0,0 +1,79 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import static com.linkedin.metadata.Constants.*; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import graphql.execution.DataFetcherResult; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class VersionSetType + implements com.linkedin.datahub.graphql.types.EntityType { + + public static final Set ASPECTS_TO_FETCH = + ImmutableSet.of(VERSION_SET_PROPERTIES_ASPECT_NAME); + private final EntityClient _entityClient; + + @Override + public EntityType type() { + return EntityType.VERSION_SET; + } + + @Override + public Function getKeyProvider() { + return Entity::getUrn; + } + + @Override + public Class objectClass() { + return VersionSet.class; + } + + @Override + public List> batchLoad( + @Nonnull List urns, @Nonnull QueryContext context) throws Exception { + final List versionSetUrns = + urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + try { + final Map entities = + _entityClient.batchGetV2( + context.getOperationContext(), + VERSION_SET_ENTITY_NAME, + new HashSet<>(versionSetUrns), + ASPECTS_TO_FETCH); + + final List gmsResults = new ArrayList<>(); + for (Urn urn : versionSetUrns) { + gmsResults.add(entities.getOrDefault(urn, null)); + } + return gmsResults.stream() + .map( + gmsResult -> + gmsResult == null + ? null + : DataFetcherResult.newResult() + .data(VersionSetMapper.map(context, gmsResult)) + .build()) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException("Failed to batch load Queries", e); + } + } +} diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql index 28688903687235..ca7f89415f6b87 100644 --- a/datahub-graphql-core/src/main/resources/app.graphql +++ b/datahub-graphql-core/src/main/resources/app.graphql @@ -531,6 +531,11 @@ type FeatureFlagsConfig { If turned on, show the manage structured properties tab in the govern dropdown """ showManageStructuredProperties: Boolean! + + """ + If turned on, exposes the versioning feature by allowing users to link entities in the UI. + """ + entityVersioningEnabled: Boolean! } """ @@ -573,4 +578,4 @@ type DocPropagationSettings { The default doc propagation setting for the platform. """ docColumnPropagation: Boolean -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index b47be7ae32b2c4..51909ae72c56b0 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -956,16 +956,6 @@ type Mutation { Remove Business Attribute """ removeBusinessAttribute(input: AddBusinessAttributeInput!): Boolean - - """ - Link the latest versioned entity to a Version Set - """ - linkAssetVersion(input: LinkVersionInput!): String - - """ - Unlink a versioned entity from a Version Set - """ - unlinkAssetVersion(input: UnlinkVersionInput!): Boolean } """ @@ -1231,6 +1221,11 @@ enum EntityType { A Business Attribute """ BUSINESS_ATTRIBUTE + + """ + A set of versioned entities, representing a single source / logical entity over time + """ + VERSION_SET } """ @@ -12921,56 +12916,6 @@ input ListBusinessAttributesInput { query: String } -""" -Input for linking a versioned entity to a Version Set -""" -input LinkVersionInput { - """ - The target version set - """ - versionSet: String! - - """ - The target versioned entity to link - """ - linkedEntity: String! - - """ - Version Tag label for the version, should be unique within a Version Set - """ - version: String! - - """ - Optional timestamp from the source system - """ - sourceTimestamp: Long - - """ - Optional creator from the source system, will be converted to an Urn - """ - sourceCreator: String - - """ - Optional comment about the version - """ - comment: String -} - -""" -Input for unlinking a versioned entity from a Version Set -""" -input UnlinkVersionInput { - """ - The target version set - """ - versionSet: String - - """ - The target versioned entity to unlink - """ - unlinkedEntity: String -} - """ The result obtained when listing Business Attribute """ diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 82bfb9ee26fc42..d8f17faa3d11c2 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -172,6 +172,11 @@ input SearchFlags { Whether or not to fetch and request for structured property facets when doing a search """ includeStructuredPropertyFacets: Boolean + + """ + Determines whether to filter out any non-latest entity version if entity is part of a Version Set, default true + """ + filterNonLatestVersions: Boolean } """ @@ -1497,4 +1502,4 @@ input GroupingCriterion { """ groupingEntityType: EntityType! -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/resources/versioning.graphql b/datahub-graphql-core/src/main/resources/versioning.graphql new file mode 100644 index 00000000000000..4a63463509c84d --- /dev/null +++ b/datahub-graphql-core/src/main/resources/versioning.graphql @@ -0,0 +1,148 @@ +type VersionSet implements Entity { + """ + The primary key of the VersionSet + """ + urn: String! + + """ + The standard Entity Type + """ + type: EntityType! + + """ + Granular API for querying edges extending from this entity + """ + relationships(input: RelationshipsInput!): EntityRelationshipsResult + + """ + The latest versioned entity linked to in this version set + """ + latestVersion: Entity + + """ + Executes a search on all versioned entities linked to this version set + By default sorts by sortId in descending order + """ + versionsSearch(input: SearchAcrossEntitiesInput!): SearchResults +} + +type VersionProperties { + """ + The linked Version Set entity that ties multiple versioned assets together + """ + versionSet: VersionSet! + + """ + Label for this versioned asset, should be unique within a version set (not enforced) + """ + version: VersionTag! + + """ + Additional version identifiers for this versioned asset. + """ + aliases: [VersionTag!]! + + """ + Comment documenting what this version was created for, changes, or represents + """ + comment: String + + """ + Whether this version is currently the latest in its verison set + """ + isLatest: Boolean! + + """ + Timestamp reflecting when the metadata for this version was created in DataHub + """ + created: ResolvedAuditStamp + + """ + Timestamp reflecting when the metadata for this version was created in DataHub + """ + createdInSource: ResolvedAuditStamp +} + +interface SupportsVersions { + """ + Indicates that this entity is versioned and provides information about the version. + """ + versionProperties: VersionProperties +} + +extend type Dataset implements SupportsVersions { + versionProperties: VersionProperties +} + +extend type MLModel implements SupportsVersions { + versionProperties: VersionProperties +} + +extend type Query { + """ + Fetch a Version Set by its URN + """ + versionSet(urn: String!): VersionSet +} + +""" +Input for linking a versioned entity to a Version Set +""" +input LinkVersionInput { + """ + The target version set + """ + versionSet: String! + + """ + The target versioned entity to link + """ + linkedEntity: String! + + """ + Version Tag label for the version, should be unique within a version set (not enforced) + """ + version: String! + + """ + Optional timestamp from the source system + """ + sourceTimestamp: Long + + """ + Optional creator from the source system, will be converted to an Urn + """ + sourceCreator: String + + """ + Optional comment about the version + """ + comment: String +} + +""" +Input for unlinking a versioned entity from a Version Set +""" +input UnlinkVersionInput { + """ + The target version set + """ + versionSet: String + + """ + The target versioned entity to unlink + """ + unlinkedEntity: String +} + +extend type Mutation { + """ + Link the latest versioned entity to a Version Set + """ + linkAssetVersion(input: LinkVersionInput!): VersionSet + + """ + Unlink a versioned entity from a Version Set + """ + unlinkAssetVersion(input: UnlinkVersionInput!): VersionSet +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java index 690856263fccc5..c2eb92f4d1cd4c 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java @@ -56,8 +56,7 @@ public void testGetSuccessful() throws Exception { Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); - String result = resolver.get(mockEnv).get(); - assertEquals(result, TEST_ENTITY_URN); + assertEquals(resolver.get(mockEnv).get().getUrn(), TEST_VERSION_SET_URN); } @Test diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java index 0000ad24a04537..e162ce96e627c6 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java @@ -48,7 +48,7 @@ public void testGetSuccessful() throws Exception { Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); - assertTrue(resolver.get(mockEnv).get()); + assertEquals(resolver.get(mockEnv).get(), null); Mockito.verify(mockService) .unlinkVersion( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java new file mode 100644 index 00000000000000..3554df074df698 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java @@ -0,0 +1,294 @@ +package com.linkedin.datahub.graphql.resolvers.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.*; +import static org.mockito.ArgumentMatchers.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.AndFilterInput; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.FacetFilterInput; +import com.linkedin.datahub.graphql.generated.SearchAcrossEntitiesInput; +import com.linkedin.datahub.graphql.generated.SearchFlags; +import com.linkedin.datahub.graphql.generated.SearchResults; +import com.linkedin.datahub.graphql.generated.SearchSortInput; +import com.linkedin.datahub.graphql.generated.SortCriterion; +import com.linkedin.datahub.graphql.generated.SortOrder; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.SearchResultMetadata; +import com.linkedin.metadata.service.ViewService; +import com.linkedin.metadata.utils.CriterionUtils; +import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.view.DataHubViewDefinition; +import com.linkedin.view.DataHubViewInfo; +import com.linkedin.view.DataHubViewType; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class VersionsSearchResolverTest { + + private static final String VERSION_SET_URN = "urn:li:versionSet:(my_version_set,dataset)"; + private static final Urn TEST_VIEW_URN = UrnUtils.getUrn("urn:li:dataHubView:test"); + private static final Urn TEST_USER_URN = UrnUtils.getUrn("urn:li:corpuser:test"); + + private static final SearchAcrossEntitiesInput BASIC_INPUT = + new SearchAcrossEntitiesInput( + List.of(EntityType.DATASET), "", 0, 10, null, null, null, null, null); + + private static final SearchAcrossEntitiesInput COMPLEX_INPUT = + new SearchAcrossEntitiesInput( + List.of(EntityType.CHART, EntityType.DATASET), + "query", + 2, + 5, + null, + List.of( + AndFilterInput.builder() + .setAnd( + List.of( + FacetFilterInput.builder() + .setField("field1") + .setValues(List.of("1", "2")) + .build(), + FacetFilterInput.builder() + .setField("field2") + .setValues(List.of("a")) + .build())) + .build(), + AndFilterInput.builder() + .setAnd( + List.of( + FacetFilterInput.builder() + .setField("field3") + .setValues(List.of("3", "4")) + .build(), + FacetFilterInput.builder() + .setField("field4") + .setValues(List.of("b")) + .build())) + .build()), + TEST_VIEW_URN.toString(), + SearchFlags.builder().setSkipCache(true).build(), + SearchSortInput.builder() + .setSortCriteria( + List.of( + SortCriterion.builder() + .setField("sortField1") + .setSortOrder(SortOrder.DESCENDING) + .build(), + SortCriterion.builder() + .setField("sortField2") + .setSortOrder(SortOrder.ASCENDING) + .build())) + .build()); + + @Test + public void testGetSuccessBasic() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + ViewService mockViewService = Mockito.mock(ViewService.class); + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(BASIC_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + SearchResults result = resolver.get(mockEnv).get(); + + // Validate the result + assertEquals(result.getSearchResults().size(), 0); + + // Validate that we called the search service correctly + Mockito.verify(mockEntityClient, Mockito.times(1)) + .searchAcrossEntities( + Mockito.argThat( + context -> + !context.getSearchContext().getSearchFlags().isFilterNonLatestVersions()), + Mockito.eq(List.of(Constants.DATASET_ENTITY_NAME)), + Mockito.eq("*"), + Mockito.eq( + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN)))))), + Mockito.eq(0), + Mockito.eq(10), + Mockito.eq( + List.of( + new com.linkedin.metadata.query.filter.SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING))), + any()); + } + + @Test + public void testGetSuccessComplex() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + + Filter viewFilter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + List.of(buildCriterion("viewField", Condition.EQUAL, "test")))))); + DataHubViewInfo viewInfo = + new DataHubViewInfo() + .setName("test") + .setType(DataHubViewType.GLOBAL) + .setCreated(new AuditStamp().setTime(0L).setActor(TEST_USER_URN)) + .setLastModified(new AuditStamp().setTime(0L).setActor(TEST_USER_URN)) + .setDefinition( + new DataHubViewDefinition() + .setEntityTypes( + new StringArray( + List.of( + Constants.DATASET_ENTITY_NAME, Constants.DASHBOARD_ENTITY_NAME))) + .setFilter(viewFilter)); + ViewService mockViewService = Mockito.mock(ViewService.class); + Mockito.when(mockViewService.getViewInfo(any(), Mockito.eq(TEST_VIEW_URN))) + .thenReturn(viewInfo); + + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(COMPLEX_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + SearchResults result = resolver.get(mockEnv).get(); + + // Validate the result + assertEquals(result.getSearchResults().size(), 0); + + // Validate that we called the search service correctly + Mockito.verify(mockEntityClient, Mockito.times(1)) + .searchAcrossEntities( + Mockito.argThat( + context -> + !context.getSearchContext().getSearchFlags().isFilterNonLatestVersions() + && context.getSearchContext().getSearchFlags().isSkipCache()), + Mockito.eq(List.of(Constants.DATASET_ENTITY_NAME)), + Mockito.eq("query"), + Mockito.eq( + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "field1", Condition.EQUAL, "1", "2"), + CriterionUtils.buildCriterion( + "field2", Condition.EQUAL, "a"), + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN), + CriterionUtils.buildCriterion( + "viewField", Condition.EQUAL, "test"))), + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "field3", Condition.EQUAL, "3", "4"), + CriterionUtils.buildCriterion( + "field4", Condition.EQUAL, "b"), + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN), + CriterionUtils.buildCriterion( + "viewField", Condition.EQUAL, "test")))))), + Mockito.eq(2), + Mockito.eq(5), + Mockito.eq( + List.of( + new com.linkedin.metadata.query.filter.SortCriterion() + .setField("sortField1") + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING), + new com.linkedin.metadata.query.filter.SortCriterion() + .setField("sortField2") + .setOrder(com.linkedin.metadata.query.filter.SortOrder.ASCENDING), + new com.linkedin.metadata.query.filter.SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING))), + any()); + } + + @Test + public void testThrowsError() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + ViewService mockViewService = Mockito.mock(ViewService.class); + + Mockito.when( + mockEntityClient.searchAcrossEntities( + any(), any(), any(), any(), Mockito.anyInt(), Mockito.anyInt(), any(), any())) + .thenThrow(new RemoteInvocationException()); + + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(BASIC_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } + + private EntityClient initMockEntityClient() throws Exception { + EntityClient client = Mockito.mock(EntityClient.class); + + Mockito.when( + client.searchAcrossEntities( + any(), + any(), + Mockito.anyString(), + any(), + Mockito.anyInt(), + Mockito.anyInt(), + any(), + Mockito.eq(null))) + .thenReturn( + new SearchResult() + .setEntities(new SearchEntityArray()) + .setNumEntities(0) + .setFrom(0) + .setPageSize(0) + .setMetadata(new SearchResultMetadata())); + + return client; + } +} diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index 2da9e733eb4072..063b784920e234 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -645,6 +645,7 @@ export const dataset3 = { structuredProperties: null, forms: null, activeIncidents: null, + versionProperties: null, } as Dataset; export const dataset3WithSchema = { diff --git a/datahub-web-react/src/appConfigContext.tsx b/datahub-web-react/src/appConfigContext.tsx index d7fef85db4b625..8ac18d0142b4e9 100644 --- a/datahub-web-react/src/appConfigContext.tsx +++ b/datahub-web-react/src/appConfigContext.tsx @@ -57,6 +57,7 @@ export const DEFAULT_APP_CONFIG = { editableDatasetNameEnabled: false, showSeparateSiblings: false, showManageStructuredProperties: false, + entityVersioningEnabled: false, }, }; diff --git a/datahub-web-react/src/graphql/app.graphql b/datahub-web-react/src/graphql/app.graphql index 0d1999f82f77cd..c1fe50d7620a3c 100644 --- a/datahub-web-react/src/graphql/app.graphql +++ b/datahub-web-react/src/graphql/app.graphql @@ -72,6 +72,7 @@ query appConfig { editableDatasetNameEnabled showSeparateSiblings showManageStructuredProperties + entityVersioningEnabled } } } diff --git a/datahub-web-react/src/graphql/dataset.graphql b/datahub-web-react/src/graphql/dataset.graphql index fcca919f614235..8bbeb304aae2cc 100644 --- a/datahub-web-react/src/graphql/dataset.graphql +++ b/datahub-web-react/src/graphql/dataset.graphql @@ -172,6 +172,7 @@ fragment nonSiblingDatasetFields on Dataset { forms { ...formsFields } + ...entityProfileVersionProperties } query getRecentQueries($urn: String!) { diff --git a/datahub-web-react/src/graphql/mlModel.graphql b/datahub-web-react/src/graphql/mlModel.graphql index 2192888caef701..ad97c7c6f530a1 100644 --- a/datahub-web-react/src/graphql/mlModel.graphql +++ b/datahub-web-react/src/graphql/mlModel.graphql @@ -34,5 +34,6 @@ query getMLModel($urn: String!) { forms { ...formsFields } + ...entityProfileVersionProperties } } diff --git a/datahub-web-react/src/graphql/preview.graphql b/datahub-web-react/src/graphql/preview.graphql index 1bee614dd7adbe..8000f59f2bf258 100644 --- a/datahub-web-react/src/graphql/preview.graphql +++ b/datahub-web-react/src/graphql/preview.graphql @@ -346,4 +346,9 @@ fragment entityPreview on Entity { ... on Container { ...entityContainer } + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } } diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index de7d1befd39b08..9edd6754022866 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -949,6 +949,11 @@ fragment searchResultsWithoutSchemaField on Entity { ... on StructuredPropertyEntity { ...structuredPropertyFields } + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } } fragment searchResultFields on Entity { diff --git a/datahub-web-react/src/graphql/versioning.graphql b/datahub-web-react/src/graphql/versioning.graphql new file mode 100644 index 00000000000000..e9b6b82494b6ed --- /dev/null +++ b/datahub-web-react/src/graphql/versioning.graphql @@ -0,0 +1,89 @@ +fragment versionProperties on VersionProperties { + versionSet { + urn + type + } + isLatest + version { + versionTag + } + aliases { + versionTag + } + comment + created { + time + actor { + urn + ...entityDisplayNameFields + editableProperties { + displayName + pictureLink + } + } + } + createdInSource { + time + actor { + urn + ...entityDisplayNameFields + editableProperties { + displayName + pictureLink + } + } + } +} + +fragment versionsSearchResults on SearchResults { + count + total + searchResults { + entity { + urn + type + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } + } + } +} + +fragment entityProfileVersionProperties on SupportsVersions { + versionProperties { + ...versionProperties + versionSet { + urn + type + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } + } +} + +query searchAcrossVersions($versionSetUrn: String!, $input: SearchAcrossEntitiesInput!) { + versionSet(urn: $versionSetUrn) { + versionsSearch(input: $input) { + ...versionsSearchResults + } + } +} + +mutation linkAssetVersion($input: LinkVersionInput!) { + linkAssetVersion(input: $input) { + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } +} + +mutation unlinkAssetVersion($input: UnlinkVersionInput!) { + unlinkAssetVersion(input: $input) { + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } +} From 4de7f61d0924dd66e86c8a31686fdf4e84a474da Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 17 Jan 2025 21:38:16 +0530 Subject: [PATCH 02/15] fix(ingest): log exception properly (#12372) --- metadata-ingestion/src/datahub/ingestion/run/pipeline.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index ee1c1608cd48c6..ef59ba7a3b58b4 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -76,8 +76,9 @@ def on_failure( failure_metadata: dict, ) -> None: logger.error( - f"{self.name} failed to write record with workunit {record_envelope.metadata['workunit_id']}" - f" with {failure_exception} and info {failure_metadata}" + f"{self.name} failed to write record with workunit {record_envelope.metadata['workunit_id']}", + extra={"failure_metadata": failure_metadata}, + exc_info=failure_exception, ) From 76e46b89dbcb0dc12e3524bbbfdf177d5db93473 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 17 Jan 2025 21:38:29 +0530 Subject: [PATCH 03/15] dev(ingest): move modules from isort,flake8 to ruff (#12373) --- .../airflow-plugin/build.gradle | 6 +-- .../airflow-plugin/pyproject.toml | 53 ++++++++++++++++--- .../airflow-plugin/setup.cfg | 21 -------- .../airflow-plugin/setup.py | 4 +- .../src/datahub_airflow_plugin/_config.py | 5 +- .../_datahub_ol_adapter.py | 3 +- .../src/datahub_airflow_plugin/_extractors.py | 23 ++++---- .../client/airflow_generator.py | 2 +- .../datahub_listener.py | 10 ++-- .../datahub_airflow_plugin/datahub_plugin.py | 6 +-- .../datahub_plugin_v22.py | 2 +- .../src/datahub_airflow_plugin/entities.py | 1 + .../example_dags/generic_recipe_sample_dag.py | 1 + .../example_dags/graph_usage_sample_dag.py | 2 +- .../example_dags/lineage_emission_dag.py | 2 +- .../datahub_airflow_plugin/hooks/datahub.py | 2 + .../lineage/_lineage_core.py | 1 - .../operators/datahub.py | 2 +- .../operators/datahub_assertion_operator.py | 2 +- .../operators/datahub_assertion_sensor.py | 2 +- .../operators/datahub_operation_operator.py | 2 +- .../operators/datahub_operation_sensor.py | 2 +- ...hub_emitter_operator_jinja_template_dag.py | 2 +- .../tests/integration/test_plugin.py | 2 +- .../airflow-plugin/tests/unit/test_airflow.py | 2 +- .../tests/unit/test_packaging.py | 2 +- .../dagster-plugin/build.gradle | 6 +-- .../dagster-plugin/pyproject.toml | 52 +++++++++++++++--- .../dagster-plugin/setup.cfg | 21 -------- .../dagster-plugin/setup.py | 5 +- .../client/dagster_generator.py | 2 + .../sensors/datahub_sensors.py | 6 ++- .../dagster-plugin/tests/unit/test_dagster.py | 4 +- .../gx-plugin/pyproject.toml | 3 -- .../prefect-plugin/pyproject.toml | 3 -- metadata-ingestion/pyproject.toml | 12 +---- .../api/entities/dataproduct/dataproduct.py | 2 +- .../datahub/ingestion/source/abs/source.py | 2 +- .../ingestion/source/dremio/dremio_api.py | 2 +- .../ingestion/source/neo4j/neo4j_source.py | 2 +- .../src/datahub/ingestion/source/s3/source.py | 2 +- .../ingestion/source/schema/json_schema.py | 2 +- .../ingestion/source/sql/clickhouse.py | 2 +- .../ingestion/source/tableau/tableau.py | 2 +- .../src/datahub/testing/mcp_diff.py | 2 +- .../src/datahub/utilities/sqllineage_patch.py | 2 +- .../integration/powerbi/test_m_parser.py | 4 +- .../tests/integration/powerbi/test_powerbi.py | 4 +- .../tests/performance/data_generation.py | 4 +- smoke-test/pyproject.toml | 2 - 50 files changed, 166 insertions(+), 144 deletions(-) diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle index 68a35c0dfc417b..1bcb58e6b7c543 100644 --- a/metadata-ingestion-modules/airflow-plugin/build.gradle +++ b/metadata-ingestion-modules/airflow-plugin/build.gradle @@ -74,16 +74,14 @@ task lint(type: Exec, dependsOn: installDev) { "find ${venv_name}/lib -path *airflow/_vendor/connexion/spec.py -exec sed -i.bak -e '169,169s/ # type: List\\[str\\]//g' {} \\; && " + "source ${venv_name}/bin/activate && set -x && " + "black --check --diff src/ tests/ && " + - "isort --check --diff src/ tests/ && " + - "flake8 --count --statistics src/ tests/ && " + + "ruff check src/ tests/ && " + "mypy --show-traceback --show-error-codes src/ tests/" } task lintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "black src/ tests/ && " + - "isort src/ tests/ && " + - "flake8 src/ tests/ && " + + "ruff check --fix src/ tests/" "mypy src/ tests/ " } diff --git a/metadata-ingestion-modules/airflow-plugin/pyproject.toml b/metadata-ingestion-modules/airflow-plugin/pyproject.toml index 648040c1951db8..7d03c2a14bf078 100644 --- a/metadata-ingestion-modules/airflow-plugin/pyproject.toml +++ b/metadata-ingestion-modules/airflow-plugin/pyproject.toml @@ -10,11 +10,50 @@ extend-exclude = ''' ''' include = '\.pyi?$' -[tool.isort] -indent = ' ' -known_future_library = ['__future__', 'datahub.utilities._markupsafe_compat', 'datahub_provider._airflow_compat'] -profile = 'black' -sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["datahub"] +extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] +section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] +force-sort-within-sections = false +force-wrap-aliases = false +split-on-trailing-comma = false +order-by-type = true +relative-imports-order = "closest-to-furthest" +force-single-line = false +single-line-exclusions = ["typing"] +length-sort = false +from-first = false +required-imports = [] +classes = ["typing"] -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file +[tool.ruff.lint] +select = [ + "B", + "C90", + "E", + "F", + "I", # For isort + "TID", +] +ignore = [ + # Ignore line length violations (handled by Black) + "E501", + # Ignore whitespace before ':' (matches Black) + "E203", + "E203", + # Allow usages of functools.lru_cache + "B019", + # Allow function call in argument defaults + "B008", +] + +[tool.ruff.lint.mccabe] +max-complexity = 15 + +[tool.ruff.lint.flake8-tidy-imports] +# Disallow all relative imports. +ban-relative-imports = "all" + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] \ No newline at end of file diff --git a/metadata-ingestion-modules/airflow-plugin/setup.cfg b/metadata-ingestion-modules/airflow-plugin/setup.cfg index c25256c5751b8d..abb9040ab3535a 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.cfg +++ b/metadata-ingestion-modules/airflow-plugin/setup.cfg @@ -1,24 +1,3 @@ -[flake8] -max-complexity = 15 -ignore = - # Ignore: line length issues, since black's formatter will take care of them. - E501, - # Ignore: 1 blank line required before class docstring. - D203, - # See https://stackoverflow.com/a/57074416. - W503, - # See https://github.com/psf/black/issues/315. - E203 -exclude = - .git, - venv, - .tox, - __pycache__ -per-file-ignores = - # imported but unused - __init__.py: F401 -ban-relative-imports = true - [mypy] plugins = sqlmypy, diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index d07063dbffc5c4..2fd74b37e89c05 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -73,9 +73,7 @@ def get_long_description(): *mypy_stubs, "black==22.12.0", "coverage>=5.1", - "flake8>=3.8.3", - "flake8-tidy-imports>=4.3.0", - "isort>=5.7.0", + "ruff==0.9.1", "mypy==1.10.1", # pydantic 1.8.2 is incompatible with mypy 0.910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py index c4964712cf9f7d..6d6ba601556788 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py @@ -1,11 +1,12 @@ from enum import Enum from typing import TYPE_CHECKING, Optional -import datahub.emitter.mce_builder as builder from airflow.configuration import conf -from datahub.configuration.common import AllowDenyPattern, ConfigModel from pydantic.fields import Field +import datahub.emitter.mce_builder as builder +from datahub.configuration.common import AllowDenyPattern, ConfigModel + if TYPE_CHECKING: from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py index 69de61aced0a59..72cdcd8813252a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py @@ -1,8 +1,9 @@ import logging -import datahub.emitter.mce_builder as builder from openlineage.client.run import Dataset as OpenLineageDataset +import datahub.emitter.mce_builder as builder + logger = logging.getLogger(__name__) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py index 28d5775f61f542..fd01ac10f98de9 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py @@ -3,17 +3,11 @@ import unittest.mock from typing import TYPE_CHECKING, Optional -import datahub.emitter.mce_builder as builder -from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import ( - get_platform_from_sqlalchemy_uri, -) -from datahub.sql_parsing.sqlglot_lineage import ( - SqlParsingResult, - create_lineage_sql_parsed_result, +from openlineage.airflow.extractors import ( + BaseExtractor, + ExtractorManager as OLExtractorManager, + TaskMetadata, ) -from openlineage.airflow.extractors import BaseExtractor -from openlineage.airflow.extractors import ExtractorManager as OLExtractorManager -from openlineage.airflow.extractors import TaskMetadata from openlineage.airflow.extractors.snowflake_extractor import SnowflakeExtractor from openlineage.airflow.extractors.sql_extractor import SqlExtractor from openlineage.airflow.utils import get_operator_class, try_import_from_string @@ -23,11 +17,20 @@ SqlJobFacet, ) +import datahub.emitter.mce_builder as builder +from datahub.ingestion.source.sql.sqlalchemy_uri_mapper import ( + get_platform_from_sqlalchemy_uri, +) +from datahub.sql_parsing.sqlglot_lineage import ( + SqlParsingResult, + create_lineage_sql_parsed_result, +) from datahub_airflow_plugin._airflow_shims import Operator from datahub_airflow_plugin._datahub_ol_adapter import OL_SCHEME_TWEAKS if TYPE_CHECKING: from airflow.models import DagRun, TaskInstance + from datahub.ingestion.graph.client import DataHubGraph logger = logging.getLogger(__name__) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py index e9f93c0c1eab0a..c1ccdaeb0a1fbd 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, cast from airflow.configuration import conf + from datahub.api.entities.datajob import DataFlow, DataJob from datahub.api.entities.dataprocess.dataprocess_instance import ( DataProcessInstance, @@ -11,7 +12,6 @@ from datahub.metadata.schema_classes import DataProcessTypeClass from datahub.utilities.urns.data_flow_urn import DataFlowUrn from datahub.utilities.urns.data_job_urn import DataJobUrn - from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED from datahub_airflow_plugin._config import DatahubLineageConfig, DatajobUrl diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index 640991a90a1d28..9de44811f60a48 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -8,9 +8,13 @@ from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast import airflow -import datahub.emitter.mce_builder as builder from airflow.models import Variable from airflow.models.serialized_dag import SerializedDagModel +from openlineage.airflow.listener import TaskHolder +from openlineage.airflow.utils import redact_with_exclusions +from openlineage.client.serde import Serde + +import datahub.emitter.mce_builder as builder from datahub.api.entities.datajob import DataJob from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -30,10 +34,6 @@ ) from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult from datahub.telemetry import telemetry -from openlineage.airflow.listener import TaskHolder -from openlineage.airflow.utils import redact_with_exclusions -from openlineage.client.serde import Serde - from datahub_airflow_plugin._airflow_shims import ( HAS_AIRFLOW_DAG_LISTENER_API, HAS_AIRFLOW_DATASET_LISTENER_API, diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py index 137cf97f69280a..7638720db023ac 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin.py @@ -15,9 +15,9 @@ logger = logging.getLogger(__name__) -_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and not os.getenv( +_USE_AIRFLOW_LISTENER_INTERFACE = HAS_AIRFLOW_LISTENER_API and os.getenv( "DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN", "false" -).lower() in ("true", "1") +).lower() not in ("true", "1") if _USE_AIRFLOW_LISTENER_INTERFACE: try: @@ -32,7 +32,7 @@ with contextlib.suppress(Exception): - if not os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() in ( + if os.getenv("DATAHUB_AIRFLOW_PLUGIN_SKIP_FORK_PATCH", "false").lower() not in ( "true", "1", ): diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py index db47f37bed562e..4bf050d41473e4 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py @@ -7,9 +7,9 @@ from airflow.lineage import PIPELINE_OUTLETS from airflow.models.baseoperator import BaseOperator from airflow.utils.module_loading import import_string + from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.telemetry import telemetry - from datahub_airflow_plugin._airflow_shims import ( MappedOperator, get_task_inlets, diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py index 30b35ac6d6198b..f3fd17259c9f63 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/entities.py @@ -2,6 +2,7 @@ from typing import List, Optional import attr + import datahub.emitter.mce_builder as builder from datahub.utilities.urns.data_job_urn import DataJobUrn from datahub.utilities.urns.dataset_urn import DatasetUrn diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py index ff8dba457066fd..ac620852c6f288 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/generic_recipe_sample_dag.py @@ -9,6 +9,7 @@ from airflow import DAG from airflow.operators.python import PythonOperator from airflow.utils.dates import days_ago + from datahub.configuration.config_loader import load_config_file from datahub.ingestion.run.pipeline import Pipeline diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py index d72ba67c23cd72..7951d6f7fd21ef 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/graph_usage_sample_dag.py @@ -4,8 +4,8 @@ import pendulum from airflow.decorators import dag, task -from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter +from datahub.ingestion.graph.client import DataHubGraph, RemovedStatusFilter from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py index 0d7cdb6b6e90a5..4351f40fe7e3ad 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/example_dags/lineage_emission_dag.py @@ -4,11 +4,11 @@ """ from datetime import timedelta -import datahub.emitter.mce_builder as builder from airflow import DAG from airflow.operators.bash import BashOperator from airflow.utils.dates import days_ago +import datahub.emitter.mce_builder as builder from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator default_args = { diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py index 5f4d787fb893d3..26c5026c075bd7 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/hooks/datahub.py @@ -2,6 +2,7 @@ from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook + from datahub.emitter.generic_emitter import Emitter from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.mxe import ( @@ -11,6 +12,7 @@ if TYPE_CHECKING: from airflow.models.connection import Connection + from datahub.emitter.kafka_emitter import DatahubKafkaEmitter from datahub.emitter.rest_emitter import DataHubRestEmitter from datahub.emitter.synchronized_file_emitter import SynchronizedFileEmitter diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py index 638458b0efd6ab..db50c48dfaf08a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py @@ -2,7 +2,6 @@ from typing import TYPE_CHECKING, Dict, List from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult - from datahub_airflow_plugin._config import DatahubLineageConfig from datahub_airflow_plugin.client.airflow_generator import AirflowGenerator from datahub_airflow_plugin.entities import ( diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py index 41d855512aa066..817db6b7480c4b 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub.py @@ -3,9 +3,9 @@ from airflow.models import BaseOperator from airflow.utils.decorators import apply_defaults from avrogen.dict_wrapper import DictWrapper + from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent - from datahub_airflow_plugin.hooks.datahub import ( DatahubGenericHook, DatahubKafkaHook, diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py index 6f93c09a9e2872..3a440b0ec14e07 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_operator.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.models import BaseOperator + from datahub.api.circuit_breaker import ( AssertionCircuitBreaker, AssertionCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py index 16e5d1cbe8b1f4..6a446ba1f3b55e 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_assertion_sensor.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.sensors.base import BaseSensorOperator + from datahub.api.circuit_breaker import ( AssertionCircuitBreaker, AssertionCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py index 94e105309537b6..eb5fe8168bccf8 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_operator.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.sensors.base import BaseSensorOperator + from datahub.api.circuit_breaker import ( OperationCircuitBreaker, OperationCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py index 434c60754064d0..89e20e46a0074a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/operators/datahub_operation_sensor.py @@ -2,11 +2,11 @@ from typing import Any, List, Optional, Sequence, Union from airflow.sensors.base import BaseSensorOperator + from datahub.api.circuit_breaker import ( OperationCircuitBreaker, OperationCircuitBreakerConfig, ) - from datahub_airflow_plugin.hooks.datahub import DatahubRestHook diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py index c1b4aa4d7b94f4..04845e601d674d 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/datahub_emitter_operator_jinja_template_dag.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta from airflow import DAG + from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.schema_classes import ( @@ -9,7 +10,6 @@ DatasetPropertiesClass, DatasetSnapshotClass, ) - from datahub_airflow_plugin.operators.datahub import DatahubEmitterOperator default_args = { diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py index 3b2c9140e4632f..d2c9821295419c 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/test_plugin.py @@ -17,9 +17,9 @@ import requests import tenacity from airflow.models.connection import Connection + from datahub.ingestion.sink.file import write_metadata_file from datahub.testing.compare_metadata_json import assert_metadata_files_equal - from datahub_airflow_plugin._airflow_shims import ( AIRFLOW_VERSION, HAS_AIRFLOW_DAG_LISTENER_API, diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py index 79620f81a437b0..1dc8e14a425dfc 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_airflow.py @@ -8,12 +8,12 @@ import airflow.configuration import airflow.version -import datahub.emitter.mce_builder as builder import packaging.version import pytest from airflow.lineage import apply_lineage, prepare_lineage from airflow.models import DAG, Connection, DagBag, DagRun, TaskInstance +import datahub.emitter.mce_builder as builder from datahub_airflow_plugin import get_provider_info from datahub_airflow_plugin._airflow_shims import ( AIRFLOW_PATCHED, diff --git a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py index a9c8b7ec65fa3c..a822527582c2cd 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/unit/test_packaging.py @@ -1,6 +1,6 @@ import setuptools -from datahub.testing.check_imports import ensure_no_indirect_model_imports +from datahub.testing.check_imports import ensure_no_indirect_model_imports from tests.utils import PytestConfig diff --git a/metadata-ingestion-modules/dagster-plugin/build.gradle b/metadata-ingestion-modules/dagster-plugin/build.gradle index 0d57bb5bfdff70..503b3556a41bfe 100644 --- a/metadata-ingestion-modules/dagster-plugin/build.gradle +++ b/metadata-ingestion-modules/dagster-plugin/build.gradle @@ -55,16 +55,14 @@ task lint(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "black --check --diff src/ tests/ examples/ && " + - "isort --check --diff src/ tests/ examples/ && " + - "flake8 --count --statistics src/ tests/ examples/ && " + + "ruff check src/ tests/ && " + "mypy --show-traceback --show-error-codes src/ tests/ examples/" } task lintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-x', '-c', "source ${venv_name}/bin/activate && " + "black src/ tests/ examples/ && " + - "isort src/ tests/ examples/ && " + - "flake8 src/ tests/ examples/ && " + + "ruff check --fix src/ tests/" "mypy src/ tests/ examples/" } diff --git a/metadata-ingestion-modules/dagster-plugin/pyproject.toml b/metadata-ingestion-modules/dagster-plugin/pyproject.toml index fba81486b9f677..7d03c2a14bf078 100644 --- a/metadata-ingestion-modules/dagster-plugin/pyproject.toml +++ b/metadata-ingestion-modules/dagster-plugin/pyproject.toml @@ -10,10 +10,50 @@ extend-exclude = ''' ''' include = '\.pyi?$' -[tool.isort] -indent = ' ' -profile = 'black' -sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' +[tool.ruff.lint.isort] +combine-as-imports = true +known-first-party = ["datahub"] +extra-standard-library = ["__future__", "datahub.utilities._markupsafe_compat", "datahub.sql_parsing._sqlglot_patch"] +section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] +force-sort-within-sections = false +force-wrap-aliases = false +split-on-trailing-comma = false +order-by-type = true +relative-imports-order = "closest-to-furthest" +force-single-line = false +single-line-exclusions = ["typing"] +length-sort = false +from-first = false +required-imports = [] +classes = ["typing"] -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file +[tool.ruff.lint] +select = [ + "B", + "C90", + "E", + "F", + "I", # For isort + "TID", +] +ignore = [ + # Ignore line length violations (handled by Black) + "E501", + # Ignore whitespace before ':' (matches Black) + "E203", + "E203", + # Allow usages of functools.lru_cache + "B019", + # Allow function call in argument defaults + "B008", +] + +[tool.ruff.lint.mccabe] +max-complexity = 15 + +[tool.ruff.lint.flake8-tidy-imports] +# Disallow all relative imports. +ban-relative-imports = "all" + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401"] \ No newline at end of file diff --git a/metadata-ingestion-modules/dagster-plugin/setup.cfg b/metadata-ingestion-modules/dagster-plugin/setup.cfg index 20a903914332aa..89b28ae45f9648 100644 --- a/metadata-ingestion-modules/dagster-plugin/setup.cfg +++ b/metadata-ingestion-modules/dagster-plugin/setup.cfg @@ -1,24 +1,3 @@ -[flake8] -max-complexity = 15 -ignore = - # Ignore: line length issues, since black's formatter will take care of them. - E501, - # Ignore: 1 blank line required before class docstring. - D203, - # See https://stackoverflow.com/a/57074416. - W503, - # See https://github.com/psf/black/issues/315. - E203 -exclude = - .git, - venv, - .tox, - __pycache__ -per-file-ignores = - # imported but unused - __init__.py: F401 -ban-relative-imports = true - [mypy] plugins = pydantic.mypy diff --git a/metadata-ingestion-modules/dagster-plugin/setup.py b/metadata-ingestion-modules/dagster-plugin/setup.py index 22c15497bd8070..f2e90c14833f78 100644 --- a/metadata-ingestion-modules/dagster-plugin/setup.py +++ b/metadata-ingestion-modules/dagster-plugin/setup.py @@ -53,10 +53,7 @@ def get_long_description(): "dagster-snowflake-pandas >= 0.11.0", "black==22.12.0", "coverage>=5.1", - "flake8>=6.0.0", - "flake8-tidy-imports>=4.3.0", - "flake8-bugbear==23.3.12", - "isort>=5.7.0", + "ruff==0.9.1", "mypy>=1.4.0", # pydantic 1.8.2 is incompatible with mypy 0.910. # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py index a87f490f2d947e..9a0a9a1b3a75ed 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/client/dagster_generator.py @@ -13,6 +13,7 @@ TableSchemaMetadataValue, ) from dagster._core.execution.stats import RunStepKeyStatsSnapshot, StepEventStatus + from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint try: @@ -23,6 +24,7 @@ from dagster._core.snap.node import OpDefSnap from dagster._core.storage.dagster_run import DagsterRun, DagsterRunStatsSnapshot + from datahub.api.entities.datajob import DataFlow, DataJob from datahub.api.entities.dataprocess.dataprocess_instance import ( DataProcessInstance, diff --git a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py index bccdb4ac7922a5..b91a9cfa56d398 100644 --- a/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py +++ b/metadata-ingestion-modules/dagster-plugin/src/datahub_dagster_plugin/sensors/datahub_sensors.py @@ -35,7 +35,9 @@ try: from dagster._core.definitions.sensor_definition import SensorReturnTypesUnion except ImportError: - from dagster._core.definitions.sensor_definition import RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion # type: ignore + from dagster._core.definitions.sensor_definition import ( # type: ignore + RawSensorEvaluationFunctionReturn as SensorReturnTypesUnion, + ) from dagster._core.definitions.target import ExecutableDefinition from dagster._core.definitions.unresolved_asset_job_definition import ( @@ -43,6 +45,7 @@ ) from dagster._core.events import DagsterEventType, HandledOutputData, LoadedInputData from dagster._core.execution.stats import RunStepKeyStatsSnapshot + from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.metadata.schema_classes import SubTypesClass @@ -52,7 +55,6 @@ ) from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.error import InvalidUrnError - from datahub_dagster_plugin.client.dagster_generator import ( DATAHUB_ASSET_GROUP_NAME_CACHE, Constant, diff --git a/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py b/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py index c951b959f85d43..9a69822984bb80 100644 --- a/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py +++ b/metadata-ingestion-modules/dagster-plugin/tests/unit/test_dagster.py @@ -22,11 +22,11 @@ RepositoryDefinition, ) from dagster._core.definitions.resource_definition import ResourceDefinition -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DatahubClientConfig from freezegun import freeze_time from utils.utils import PytestConfig, check_golden_file +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.graph.client import DatahubClientConfig from datahub_dagster_plugin.client.dagster_generator import DatahubDagsterSourceConfig from datahub_dagster_plugin.sensors.datahub_sensors import ( DatahubSensors, diff --git a/metadata-ingestion-modules/gx-plugin/pyproject.toml b/metadata-ingestion-modules/gx-plugin/pyproject.toml index fba81486b9f677..bc951452175268 100644 --- a/metadata-ingestion-modules/gx-plugin/pyproject.toml +++ b/metadata-ingestion-modules/gx-plugin/pyproject.toml @@ -14,6 +14,3 @@ include = '\.pyi?$' indent = ' ' profile = 'black' sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' - -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file diff --git a/metadata-ingestion-modules/prefect-plugin/pyproject.toml b/metadata-ingestion-modules/prefect-plugin/pyproject.toml index fba81486b9f677..bc951452175268 100644 --- a/metadata-ingestion-modules/prefect-plugin/pyproject.toml +++ b/metadata-ingestion-modules/prefect-plugin/pyproject.toml @@ -14,6 +14,3 @@ include = '\.pyi?$' indent = ' ' profile = 'black' sections = 'FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER' - -[tool.pyright] -extraPaths = ['tests'] \ No newline at end of file diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml index f3a51e135082ee..745547f88bcb93 100644 --- a/metadata-ingestion/pyproject.toml +++ b/metadata-ingestion/pyproject.toml @@ -11,6 +11,7 @@ extend-exclude = ''' include = '\.pyi?$' target-version = ['py38', 'py39', 'py310', 'py311'] + [tool.ruff.lint.isort] combine-as-imports = true known-first-party = ["datahub"] @@ -28,16 +29,6 @@ from-first = false required-imports = [] classes = ["typing"] -[tool.pyright] -extraPaths = ['tests'] - -[tool.vulture] -exclude = ["src/datahub/metadata/"] -ignore_decorators = ["@click.*", "@validator", "@root_validator", "@pydantic.validator", "@pydantic.root_validator", "@pytest.fixture"] -ignore_names = ["*Source", "*Sink", "*Report"] -paths = ["src"] -sort_by_size = true - [tool.ruff] # Same as Black. line-length = 88 @@ -70,7 +61,6 @@ ignore = [ "B008", # TODO: Enable these later "B006", # Mutable args - "B007", # Unused loop control variable "B017", # Do not assert blind exception "B904", # Checks for raise statements in exception handlers that lack a from clause ] diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py index 2097922c151366..39de4d7f80558e 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py +++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py @@ -438,7 +438,7 @@ def _patch_ownership( for replace_index, replace_value in patches_replace.items(): list_to_manipulate[replace_index] = replace_value - for drop_index, drop_value in patches_drop.items(): + for drop_value in patches_drop.values(): list_to_manipulate.remove(drop_value) for add_value in patches_add: diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py index ad2bc36cf558b5..e4f9cd0ee7e018 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/abs/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/abs/source.py @@ -613,7 +613,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: table_data.table_path ].timestamp = table_data.timestamp - for guid, table_data in table_dict.items(): + for _, table_data in table_dict.items(): yield from self.ingest_table(table_data, path_spec) def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py index d913b7e42065d2..072995c10ebcef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py @@ -181,7 +181,7 @@ def authenticate(self, connection_args: "DremioSourceConfig") -> None: return # On-prem Dremio authentication (PAT or Basic Auth) - for retry in range(1, self._retry_count + 1): + for _ in range(1, self._retry_count + 1): try: if connection_args.authentication_method == "PAT": self.session.headers.update( diff --git a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py index 2c9107b967e4f8..8cdd4b17733e01 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py @@ -286,7 +286,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: df = self.get_neo4j_metadata( "CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key RETURN key, value[key] AS value;" ) - for index, row in df.iterrows(): + for _, row in df.iterrows(): try: yield MetadataWorkUnit( id=row["key"], diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index 989d0d734352a2..3173423f86a2ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -1124,7 +1124,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: table_data.table_path ].timestamp = table_data.timestamp - for guid, table_data in table_dict.items(): + for _, table_data in table_dict.items(): yield from self.ingest_table(table_data, path_spec) if not self.source_config.is_profiling_enabled(): diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py b/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py index 635e894d18c7e5..a50e99393fdc27 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py @@ -354,7 +354,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}" if os.path.isdir(self.config.path): - for root, dirs, files in os.walk(self.config.path, topdown=False): + for root, _, files in os.walk(self.config.path, topdown=False): for file_name in [f for f in files if f.endswith(".json")]: try: yield from self._load_one_file( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py index aeb21e88d04437..2899bcc2de37b0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py @@ -268,7 +268,7 @@ def _get_table_or_view_names(self, relkind, connection, schema=None, **kw): info_cache = kw.get("info_cache") all_relations = self._get_all_relation_info(connection, info_cache=info_cache) relation_names = [] - for key, relation in all_relations.items(): + for _, relation in all_relations.items(): if relation.database == schema and relation.relkind == relkind: relation_names.append(relation.relname) return relation_names diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index ee841a2a201863..8187fff559208e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -3605,7 +3605,7 @@ def emit_project_in_topological_order( parent_container_key=parent_project_key, ) - for id_, project in self.tableau_project_registry.items(): + for project in self.tableau_project_registry.values(): logger.debug( f"project {project.name} and it's parent {project.parent_name} and parent id {project.parent_id}" ) diff --git a/metadata-ingestion/src/datahub/testing/mcp_diff.py b/metadata-ingestion/src/datahub/testing/mcp_diff.py index 5e669a718e9ad3..b58afc10148edc 100644 --- a/metadata-ingestion/src/datahub/testing/mcp_diff.py +++ b/metadata-ingestion/src/datahub/testing/mcp_diff.py @@ -246,7 +246,7 @@ def pretty(self, verbose: bool = False) -> str: for urn in self.aspect_changes.keys() - self.urns_added - self.urns_removed: aspect_map = self.aspect_changes[urn] s.append(f"Urn changed, {urn}:") - for aspect_name, aspect_diffs in aspect_map.items(): + for aspect_diffs in aspect_map.values(): for i, ga in aspect_diffs.aspects_added.items(): s.append(self.report_aspect(ga, i, "added")) if verbose: diff --git a/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py b/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py index afcd02478ae687..4c237d02727f72 100644 --- a/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py +++ b/metadata-ingestion/src/datahub/utilities/sqllineage_patch.py @@ -8,7 +8,7 @@ # Patch based on sqllineage v1.3.3 def end_of_query_cleanup_patch(self, holder: SubQueryLineageHolder) -> None: # type: ignore - for i, tbl in enumerate(self.tables): + for tbl in self.tables: holder.add_read(tbl) self.union_barriers.append((len(self.columns), len(self.tables))) diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py index 832d00d9c54702..6f7a9c7833ba1a 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py +++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py @@ -1070,7 +1070,7 @@ def test_unsupported_data_platform(): ) # type :ignore is_entry_present: bool = False - for key, entry in info_entries.items(): + for entry in info_entries.values(): if entry.title == "Non-Data Platform Expression": is_entry_present = True break @@ -1163,7 +1163,7 @@ def test_m_query_timeout(mock_get_lark_parser): ) # type :ignore is_entry_present: bool = False - for key, entry in warn_entries.items(): + for entry in warn_entries.values(): if entry.title == "M-Query Parsing Timeout": is_entry_present = True break diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index 739be7cc8408dd..911d8a9f35139f 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -1438,7 +1438,7 @@ def test_powerbi_cross_workspace_reference_info_message( is_entry_present: bool = False # Printing INFO entries - for key, entry in info_entries.items(): + for entry in info_entries.values(): if entry.title == "Missing Lineage For Tile": is_entry_present = True break @@ -1563,7 +1563,7 @@ def test_powerbi_app_ingest_info_message( is_entry_present: bool = False # Printing INFO entries - for key, entry in info_entries.items(): + for entry in info_entries.values(): if entry.title == "App Ingestion Is Disabled": is_entry_present = True break diff --git a/metadata-ingestion/tests/performance/data_generation.py b/metadata-ingestion/tests/performance/data_generation.py index fcff13edf59363..266c0d9af03224 100644 --- a/metadata-ingestion/tests/performance/data_generation.py +++ b/metadata-ingestion/tests/performance/data_generation.py @@ -198,7 +198,7 @@ def generate_queries( all_tables = seed_metadata.tables + seed_metadata.views users = [f"user_{i}@xyz.com" for i in range(num_users)] - for i in range(num_selects): # Pure SELECT statements + for _ in range(num_selects): # Pure SELECT statements tables = _sample_list(all_tables, tables_per_select) all_columns = [ FieldAccess(column, table) for table in tables for column in table.columns @@ -213,7 +213,7 @@ def generate_queries( fields_accessed=_sample_list(all_columns, columns_per_select), ) - for i in range(num_operations): + for _ in range(num_operations): modified_table = random.choice(seed_metadata.tables) n_col = len(modified_table.columns) num_columns_modified = NormalDistribution(n_col / 2, n_col / 2) diff --git a/smoke-test/pyproject.toml b/smoke-test/pyproject.toml index c7745d0e9a3640..aeb3c03b6466dd 100644 --- a/smoke-test/pyproject.toml +++ b/smoke-test/pyproject.toml @@ -42,5 +42,3 @@ warn_unused_configs = true disallow_incomplete_defs = false disallow_untyped_defs = false -[tool.pyright] -extraPaths = ['tests'] From 0c597d35af83e09b3ca4f310bbe2cbab0c44eda3 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Fri, 17 Jan 2025 18:15:56 +0000 Subject: [PATCH 04/15] feat(docs): Add release docs for 0.15.0 (#12374) --- .../ingest/source/builder/NameSourceStep.tsx | 2 +- docs/api/datahub-apis.md | 4 ++ docs/how/updating-datahub.md | 60 ++++++++++--------- gradle/versioning/versioning.gradle | 2 +- 4 files changed, 39 insertions(+), 29 deletions(-) diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx index 898fbd6a6d9268..68e6c8d3436fb9 100644 --- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx @@ -200,7 +200,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) setVersion(event.target.value)} onBlur={(event) => handleBlur(event, setVersion)} diff --git a/docs/api/datahub-apis.md b/docs/api/datahub-apis.md index c46aacde3a0cb5..62136406e6ff66 100644 --- a/docs/api/datahub-apis.md +++ b/docs/api/datahub-apis.md @@ -12,6 +12,10 @@ DataHub has several APIs to manipulate metadata on the platform. Here's the list In general, **Python and Java SDKs** are our most recommended tools for extending and customizing the behavior of your DataHub instance. We don't recommend using the **OpenAPI** directly, as it's more complex and less user-friendly than the other APIs. +:::warning +About async usage of APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. +::: + ## Python and Java SDK We offer an SDK for both Python and Java that provide full functionality when it comes to CRUD operations and any complex functionality you may want to build into DataHub. We recommend using the SDKs for most use cases. Here are the examples of how to use the SDKs: diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index eb5a792216d981..b887ca999c4046 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -1,8 +1,3 @@ -# Known Issues - -- Async APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. - - # Updating DataHub