From 41a4a6cddf1b8838e99b3533439b8bd9f15bd9d0 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 31 Dec 2024 14:12:43 -0600 Subject: [PATCH] feat(entityVersioning): support search flag for latest version filtering --- .../aspect/CachingAspectRetriever.java | 6 + .../patch/template/AspectTemplateEngine.java | 5 +- .../metadata/entity/SearchRetriever.java | 32 ++- .../java/com/linkedin/metadata/Constants.java | 1 + .../EntityVersioningServiceImpl.java | 9 +- .../search/SearchServiceSearchRetriever.java | 17 +- .../metadata/search/utils/ESUtils.java | 36 +++ .../EntityVersioningServiceTest.java | 17 +- .../AutocompleteRequestHandlerTest.java | 156 +++++++++++ .../request/SearchRequestHandlerTest.java | 242 +++++++++++++++++- .../com/linkedin/common/VersionProperties.pdl | 4 + .../linkedin/metadata/query/SearchFlags.pdl | 5 + .../openapi/v3/OpenAPIV3Generator.java | 6 +- ...com.linkedin.entity.entities.snapshot.json | 6 + 14 files changed, 511 insertions(+), 31 deletions(-) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java index 375dd8cf8911e1..7b3233921d039e 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/CachingAspectRetriever.java @@ -29,6 +29,12 @@ public Map> getLatestSystemAspects( return Collections.emptyMap(); } + @Nonnull + @Override + public Map entityExists(Set urns) { + return Collections.emptyMap(); + } + @Nonnull @Override public EntityRegistry getEntityRegistry() { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java index d1639ab59079f5..821dad13aa0c3c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.aspect.patch.template; +import static com.linkedin.metadata.Constants.*; + import com.fasterxml.jackson.core.JsonProcessingException; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.models.AspectSpec; @@ -12,9 +14,6 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; -import static com.linkedin.metadata.Constants.*; - - /** * Holds connection between aspect specs and their templates and drives the generation from * templates diff --git a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java index be17485854ba79..19dc89d26cb1af 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.entity; import com.google.common.collect.ImmutableList; +import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; @@ -11,6 +12,28 @@ import javax.annotation.Nullable; public interface SearchRetriever { + + SearchFlags RETRIEVER_SEARCH_FLAGS = + new SearchFlags() + .setFulltext(false) + .setMaxAggValues(20) + .setSkipCache(false) + .setSkipAggregates(true) + .setSkipHighlighting(true) + .setIncludeSoftDeleted(false) + .setIncludeRestricted(false); + + SearchFlags RETRIEVER_SEARCH_FLAGS_NO_CACHE_ALL_VERSIONS = + new SearchFlags() + .setFulltext(false) + .setMaxAggValues(20) + .setSkipCache(true) + .setSkipAggregates(true) + .setSkipHighlighting(true) + .setIncludeSoftDeleted(false) + .setIncludeRestricted(false) + .setFilterNonLatestVersions(false); + /** * Allows for configuring the sort, should only be used when sort specified is unique. More often * the default is desirable to just use the urnSort @@ -20,7 +43,8 @@ ScrollResult scroll( @Nullable Filter filters, @Nullable String scrollId, int count, - List sortCriteria); + List sortCriteria, + @Nullable SearchFlags searchFlags); /** * Returns search results for the given entities, filtered and sorted. @@ -39,7 +63,8 @@ default ScrollResult scroll( SortCriterion urnSort = new SortCriterion(); urnSort.setField("urn"); urnSort.setOrder(SortOrder.ASCENDING); - return scroll(entities, filters, scrollId, count, ImmutableList.of(urnSort)); + return scroll( + entities, filters, scrollId, count, ImmutableList.of(urnSort), RETRIEVER_SEARCH_FLAGS); } SearchRetriever EMPTY = new EmptySearchRetriever(); @@ -52,7 +77,8 @@ public ScrollResult scroll( @Nullable Filter filters, @Nullable String scrollId, int count, - List sortCriteria) { + List sortCriteria, + @Nullable SearchFlags searchFlags) { ScrollResult empty = new ScrollResult(); empty.setEntities(new SearchEntityArray()); empty.setNumEntities(0); diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 2f7b27261ce59d..463376edcdf259 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -473,6 +473,7 @@ public class Constants { // Versioning related public static final String INITIAL_VERSION_SORT_ID = "AAAAAAAA"; public static final String VERSION_SORT_ID_FIELD_NAME = "versionSortId"; + public static final String IS_LATEST_FIELD_NAME = "isLatest"; public static final String DISPLAY_PROPERTIES_ASPECT_NAME = "displayProperties"; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java index 167f79bf184947..ab659a5b551618 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceImpl.java @@ -193,7 +193,7 @@ public List linkLatestVersion( return entityService.ingestProposal( opContext, AspectsBatchImpl.builder() - .mcps(proposals, opContext.getAuditStamp(), opContext.getRetrieverContext().get()) + .mcps(proposals, opContext.getAuditStamp(), opContext.getRetrieverContext()) .build(), false); } @@ -243,7 +243,7 @@ public List unlinkVersion( EntityKeyUtils.convertUrnToEntityKey( versionSetUrn, opContext.getEntityRegistryContext().getKeyAspectSpec(versionSetUrn)); - SearchRetriever searchRetriever = opContext.getRetrieverContext().get().getSearchRetriever(); + SearchRetriever searchRetriever = opContext.getRetrieverContext().getSearchRetriever(); // Find current latest version and previous ScrollResult linkedVersions = @@ -257,7 +257,8 @@ public List unlinkVersion( ImmutableList.of( new SortCriterion() .setField(VERSION_SORT_ID_FIELD_NAME) - .setOrder(SortOrder.DESCENDING))); + .setOrder(SortOrder.DESCENDING)), + SearchRetriever.RETRIEVER_SEARCH_FLAGS_NO_CACHE_ALL_VERSIONS); String updatedLatestVersionUrn = null; SearchEntityArray linkedEntities = linkedVersions.getEntities(); @@ -335,7 +336,7 @@ public List unlinkVersion( .mcps( ImmutableList.of(versionSetPropertiesProposal), opContext.getAuditStamp(), - opContext.getRetrieverContext().get()) + opContext.getRetrieverContext()) .build(), false); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java index eda9e9e3a73157..dae119beec4a7e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java @@ -8,6 +8,7 @@ import io.datahubproject.metadata.context.OperationContext; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.Builder; @@ -17,15 +18,6 @@ @Getter @Builder public class SearchServiceSearchRetriever implements SearchRetriever { - private static final SearchFlags RETRIEVER_SEARCH_FLAGS = - new SearchFlags() - .setFulltext(false) - .setMaxAggValues(20) - .setSkipCache(false) - .setSkipAggregates(true) - .setSkipHighlighting(true) - .setIncludeSoftDeleted(false) - .setIncludeRestricted(false); @Setter private OperationContext systemOperationContext; private final SearchService searchService; @@ -36,7 +28,8 @@ public ScrollResult scroll( @Nullable Filter filters, @Nullable String scrollId, int count, - List sortCriteria) { + List sortCriteria, + @Nullable SearchFlags searchFlags) { List finalCriteria = new ArrayList<>(sortCriteria); if (sortCriteria.stream().noneMatch(sortCriterion -> "urn".equals(sortCriterion.getField()))) { SortCriterion urnSort = new SortCriterion(); @@ -44,8 +37,10 @@ public ScrollResult scroll( urnSort.setOrder(SortOrder.ASCENDING); finalCriteria.add(urnSort); } + final SearchFlags finalSearchFlags = + Optional.ofNullable(searchFlags).orElse(RETRIEVER_SEARCH_FLAGS); return searchService.scrollAcrossEntities( - systemOperationContext.withSearchFlags(flags -> RETRIEVER_SEARCH_FLAGS), + systemOperationContext.withSearchFlags(flags -> finalSearchFlags), entities, "*", filters, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 17bbbaf059dec4..95fff81d13957c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -20,10 +20,12 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; +import com.linkedin.metadata.utils.CriterionUtils; import io.datahubproject.metadata.context.OperationContext; import java.util.Collections; import java.util.HashMap; @@ -188,6 +190,13 @@ public static BoolQueryBuilder buildFilterQuery( }); finalQueryBuilder.should(andQueryBuilder); } + if (Boolean.TRUE.equals( + opContext.getSearchContext().getSearchFlags().isFilterNonLatestVersions())) { + BoolQueryBuilder filterNonLatestVersions = + ESUtils.buildFilterNonLatestEntities( + opContext, queryFilterRewriteChain, searchableFieldTypes); + finalQueryBuilder.must(filterNonLatestVersions); + } if (!finalQueryBuilder.should().isEmpty()) { finalQueryBuilder.minimumShouldMatch(1); } @@ -869,4 +878,31 @@ private static void filterSoftDeletedByDefault( } } } + + public static BoolQueryBuilder buildFilterNonLatestEntities( + OperationContext opContext, + QueryFilterRewriteChain queryFilterRewriteChain, + Map> searchableFieldTypes) { + ConjunctiveCriterion isLatestCriterion = new ConjunctiveCriterion(); + CriterionArray isLatestCriterionArray = new CriterionArray(); + isLatestCriterionArray.add( + CriterionUtils.buildCriterion(IS_LATEST_FIELD_NAME, Condition.EQUAL, "true")); + isLatestCriterion.setAnd(isLatestCriterionArray); + BoolQueryBuilder isLatest = + ESUtils.buildConjunctiveFilterQuery( + isLatestCriterion, false, searchableFieldTypes, opContext, queryFilterRewriteChain); + ConjunctiveCriterion isNotVersionedCriterion = new ConjunctiveCriterion(); + CriterionArray isNotVersionedCriterionArray = new CriterionArray(); + isNotVersionedCriterionArray.add( + CriterionUtils.buildCriterion(IS_LATEST_FIELD_NAME, Condition.EXISTS, true)); + isNotVersionedCriterion.setAnd(isNotVersionedCriterionArray); + BoolQueryBuilder isNotVersioned = + ESUtils.buildConjunctiveFilterQuery( + isNotVersionedCriterion, + false, + searchableFieldTypes, + opContext, + queryFilterRewriteChain); + return QueryBuilders.boolQuery().should(isLatest).should(isNotVersioned).minimumShouldMatch(1); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java index 40eb4288424491..f0ca66a06aec33 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/versioning/EntityVersioningServiceTest.java @@ -276,7 +276,8 @@ public void testUnlinkInitialVersion() throws Exception { ScrollResult scrollResult = new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); - when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any())).thenReturn(scrollResult); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); // Execute List results = @@ -292,7 +293,7 @@ public void testUnlinkInitialVersion() throws Exception { eq(VERSION_PROPERTIES_ASPECT_NAME), anyMap(), eq(true)); - verify(mockSearchRetriever, never()).scroll(any(), any(), anyString(), anyInt(), any()); + verify(mockSearchRetriever, never()).scroll(any(), any(), anyString(), anyInt(), any(), any()); } @Test @@ -328,7 +329,8 @@ public void testUnlinkLatestVersionWithPriorVersion() throws Exception { ScrollResult scrollResult = new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); - when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any())).thenReturn(scrollResult); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); // Mock delete aspect response RollbackResult versionPropsDeleteResult = @@ -397,7 +399,8 @@ public void testUnlinkNotLatestVersionWithPriorVersion() throws Exception { ScrollResult scrollResult = new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); - when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any())).thenReturn(scrollResult); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); // Mock delete aspect response RollbackResult versionPropsDeleteResult = @@ -468,7 +471,8 @@ public void testUnlinkNotReturnedSingleVersionWithPriorVersion() throws Exceptio ScrollResult scrollResult = new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); - when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any())).thenReturn(scrollResult); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); // Mock delete aspect response RollbackResult versionPropsDeleteResult = @@ -537,7 +541,8 @@ public void testUnlinkNotReturnedDoubleVersionWithPriorVersion() throws Exceptio ScrollResult scrollResult = new ScrollResult().setEntities(relatedEntities).setMetadata(new SearchResultMetadata()); - when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any())).thenReturn(scrollResult); + when(mockSearchRetriever.scroll(any(), any(), any(), eq(2), any(), any())) + .thenReturn(scrollResult); // Mock delete aspect response RollbackResult versionPropsDeleteResult = diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index c5205906e9d373..23d493b7287f78 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -1,9 +1,12 @@ package com.linkedin.metadata.search.query.request; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.Mockito.mock; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import com.google.common.collect.ImmutableList; import com.linkedin.metadata.TestEntitySpecBuilder; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; @@ -13,22 +16,35 @@ import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; +import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.opensearch.action.search.SearchRequest; import org.opensearch.common.lucene.search.function.FieldValueFactorFunction; import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; import org.opensearch.index.query.MultiMatchQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.opensearch.index.query.functionscore.ScoreFunctionBuilders; import org.opensearch.search.builder.SearchSourceBuilder; @@ -40,6 +56,8 @@ public class AutocompleteRequestHandlerTest { private static AutocompleteRequestHandler handler; private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); + private OperationContext nonMockOpContext = + TestOperationContexts.systemContextNoSearchAuthorization(); static { testQueryConfig = new SearchConfiguration(); @@ -465,10 +483,148 @@ public void testCustomConfigWithFunctionScores() { assertEquals(wrapper.filterFunctionBuilders(), expectedCustomScoreFunctions); } + @Test + public void testFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + nonMockOpContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.size() == 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + nonMockOpContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + // bool -> filter -> [bool] -> must -> [bool] + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + private static QueryBuilder extractNestedQuery(BoolQueryBuilder nested) { assertEquals(nested.should().size(), 1); BoolQueryBuilder firstLevel = (BoolQueryBuilder) nested.should().get(0); assertEquals(firstLevel.should().size(), 1); return firstLevel.should().get(0); } + + private BoolQueryBuilder getQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { + final Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); + + AutocompleteRequestHandler requestHandler = + AutocompleteRequestHandler.getBuilder( + entitySpec, + CustomSearchConfiguration.builder().build(), + QueryFilterRewriteChain.EMPTY, + testQueryConfig); + + return (BoolQueryBuilder) + ((FunctionScoreQueryBuilder) + requestHandler + .getSearchRequest( + mockOpContext.withSearchFlags( + flags -> + flags + .setFulltext(false) + .setFilterNonLatestVersions(filterNonLatest)), + "", + "platform", + filter, + 3) + .source() + .query()) + .query(); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 1a91ae35c6595b..da8d2b988621ef 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.search.query.request; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; @@ -56,6 +57,8 @@ import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.ExistsQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.query.TermsQueryBuilder; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; @@ -805,7 +808,214 @@ public void testQueryByDefault() { } } + @Test + public void testFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertEquals(isLatestQueries.size(), 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + List isLatestQueries = + testQuery.filter().stream() + .filter(filter -> filter instanceof BoolQueryBuilder) + .flatMap(filter -> ((BoolQueryBuilder) filter).must().stream()) + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + + @Test + public void testAggregationFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getAggregationQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + true); + + List isLatestQueries = + testQuery.must().stream() + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertEquals(isLatestQueries.size(), 2, "Expected to find two queries"); + final TermQueryBuilder termQueryBuilder = (TermQueryBuilder) isLatestQueries.get(0); + assertEquals(termQueryBuilder.fieldName(), "isLatest"); + Set values = new HashSet<>(); + values.add((Boolean) termQueryBuilder.value()); + + assertEquals(values.size(), 1, "Expected only true value."); + assertTrue(values.contains(true)); + final ExistsQueryBuilder existsQueryBuilder = (ExistsQueryBuilder) isLatestQueries.get(1); + assertEquals(existsQueryBuilder.fieldName(), "isLatest"); + } + + @Test + public void testAggregationNoFilterLatestVersions() { + final Criterion filterCriterion = + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); + + final BoolQueryBuilder testQuery = + getAggregationQuery( + filterCriterion, + operationContext.getEntityRegistry().getEntitySpec(DATASET_ENTITY_NAME), + false); + + List isLatestQueries = + testQuery.must().stream() + .filter(must -> must instanceof BoolQueryBuilder) + .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) + .filter(should -> should instanceof BoolQueryBuilder) + .flatMap( + should -> { + BoolQueryBuilder boolShould = (BoolQueryBuilder) should; + + // Get isLatest: true term queries + Stream filterQueries = + boolShould.filter().stream() + .filter( + f -> + f instanceof TermQueryBuilder + && ((TermQueryBuilder) f).fieldName().equals("isLatest")); + + // Get isLatest exists queries + Stream existsQueries = + boolShould.mustNot().stream() + .filter(mn -> mn instanceof BoolQueryBuilder) + .flatMap(mn -> ((BoolQueryBuilder) mn).must().stream()) + .filter( + mq -> + mq instanceof ExistsQueryBuilder + && ((ExistsQueryBuilder) mq).fieldName().equals("isLatest")); + + return Stream.concat(filterQueries, existsQueries); + }) + .collect(Collectors.toList()); + + assertTrue(isLatestQueries.isEmpty(), "Expected to find no queries"); + } + private BoolQueryBuilder getQuery(final Criterion filterCriterion) { + return getQuery(filterCriterion, TestEntitySpecBuilder.getSpec(), true); + } + + private BoolQueryBuilder getQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { final Filter filter = new Filter() .setOr( @@ -824,7 +1034,8 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { return (BoolQueryBuilder) requestHandler .getSearchRequest( - operationContext.withSearchFlags(flags -> flags.setFulltext(false)), + operationContext.withSearchFlags( + flags -> flags.setFulltext(false).setFilterNonLatestVersions(filterNonLatest)), "", filter, null, @@ -834,4 +1045,33 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { .source() .query(); } + + private BoolQueryBuilder getAggregationQuery( + final Criterion filterCriterion, final EntitySpec entitySpec, boolean filterNonLatest) { + final Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); + + final SearchRequestHandler requestHandler = + SearchRequestHandler.getBuilder( + operationContext.getEntityRegistry(), + entitySpec, + testQueryConfig, + null, + QueryFilterRewriteChain.EMPTY); + + return (BoolQueryBuilder) + requestHandler + .getAggregationRequest( + operationContext.withSearchFlags( + flags -> flags.setFulltext(false).setFilterNonLatestVersions(filterNonLatest)), + "platform", + filter, + 10) + .source() + .query(); + } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl index 49f56d18a54e94..6c2e444ad753ee 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/VersionProperties.pdl @@ -69,5 +69,9 @@ record VersionProperties { /** * Marks whether this version is currently the latest. Set by a side effect and should not be modified by API. */ + @Searchable = { + "queryByDefault": false, + "fieldType": "BOOLEAN" + } isLatest: optional boolean } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl index a3a7a8cda58a8d..ab5873452641ed 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl @@ -64,4 +64,9 @@ record SearchFlags { * By default we include these, but custom aggregation requests don't need them. */ includeDefaultFacets: optional boolean = true + + /** + * Include only latest versions in version sets, default true + */ + filterNonLatestVersions: optional boolean = true } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index 223a1701478571..f7764f2ddb39a1 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -188,7 +188,8 @@ public static OpenAPI generateOpenApiSpec( // --> Aspect Paths definedEntitySpecs.forEach( - e -> e.getAspectSpecs().stream() + e -> + e.getAspectSpecs().stream() .filter(a -> definitionNames.contains(a.getName())) .sorted(Comparator.comparing(AspectSpec::getName)) .forEach( @@ -209,8 +210,7 @@ public static OpenAPI generateOpenApiSpec( String.format( "/v3/entity/%s/{urn}/%s", e.getName().toLowerCase(), a.getName().toLowerCase()), - buildSingleEntityAspectPath( - e, a.getName(), a.getPegasusSchema().getName())))); + buildSingleEntityAspectPath(e, a)))); // --> Link & Unlink APIs if (configurationProvider.getFeatureFlags().isEntityVersioning()) { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 0a91ac3306b7d2..f58d83dd1e5cb7 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -6143,6 +6143,12 @@ "doc" : "Include default facets when getting facets to aggregate on in search requests.\nBy default we include these, but custom aggregation requests don't need them.", "default" : true, "optional" : true + }, { + "name" : "filterNonLatestVersions", + "type" : "boolean", + "doc" : "Include only latest versions in version sets, default true", + "default" : true, + "optional" : true } ] }, { "type" : "enum",