Skip to content

Commit

Permalink
fix(structuredProperties): fixes underscore behavior in structured pr…
Browse files Browse the repository at this point in the history
…operty names (datahub-project#11746)

Co-authored-by: david-leifker <[email protected]>
  • Loading branch information
RyanHolstien and david-leifker authored Oct 30, 2024
1 parent 143fc01 commit cf3b08f
Show file tree
Hide file tree
Showing 5 changed files with 282 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD;
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX;
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD;
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD_PREFIX;

import com.google.common.collect.ImmutableSet;
import com.linkedin.common.Status;
Expand Down Expand Up @@ -120,12 +119,13 @@ public static Optional<String> toStructuredPropertyFacetName(
lookupDefinitionFromFilterOrFacetName(
@Nonnull String fieldOrFacetName, @Nullable AspectRetriever aspectRetriever) {
if (fieldOrFacetName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD + ".")) {
String fqn =
fieldOrFacetName
.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)
.replace(".keyword", "")
.replace(".delimited", "");
// Coming in from the UI this is structuredProperties.<FQN> + any particular specifier for
// subfield (.keyword etc)
String fqn = fieldOrFacetToFQN(fieldOrFacetName);

// FQN Maps directly to URN with urn:li:structuredProperties:FQN
Urn urn = toURNFromFQN(fqn);

Map<Urn, Map<String, Aspect>> result =
Objects.requireNonNull(aspectRetriever)
.getLatestAspectObjects(
Expand Down Expand Up @@ -223,9 +223,8 @@ public static void validateStructuredPropertyFQN(
* @param fqn structured property's fqn
* @return the expected structured property urn
*/
private static Urn toURNFromFQN(@Nonnull String fqn) {
return UrnUtils.getUrn(
String.join(":", "urn:li", STRUCTURED_PROPERTY_ENTITY_NAME, fqn.replace('_', '.')));
public static Urn toURNFromFQN(@Nonnull String fqn) {
return UrnUtils.getUrn(String.join(":", "urn:li", STRUCTURED_PROPERTY_ENTITY_NAME, fqn));
}

public static void validateFilter(
Expand All @@ -235,12 +234,13 @@ public static void validateFilter(
return;
}

Set<String> fieldNames = new HashSet<>();
Set<String> fqns = new HashSet<>();

if (filter.getCriteria() != null) {
for (Criterion c : filter.getCriteria()) {
if (c.getField().startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) {
fieldNames.add(stripStructuredPropertyPrefix(c.getField()));
String fqn = fieldOrFacetToFQN(c.getField());
fqns.add(fqn);
}
}
}
Expand All @@ -249,24 +249,23 @@ public static void validateFilter(
for (ConjunctiveCriterion cc : filter.getOr()) {
for (Criterion c : cc.getAnd()) {
if (c.getField().startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) {
fieldNames.add(stripStructuredPropertyPrefix(c.getField()));
String fqn = fieldOrFacetToFQN(c.getField());
fqns.add(fqn);
}
}
}
}

if (!fieldNames.isEmpty()) {
validateStructuredPropertyFQN(fieldNames, Objects.requireNonNull(aspectRetriever));
if (!fqns.isEmpty()) {
validateStructuredPropertyFQN(fqns, Objects.requireNonNull(aspectRetriever));
}
}

private static String stripStructuredPropertyPrefix(String s) {
if (s.startsWith(STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD_PREFIX)) {
return s.substring(STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD.length() + 1).split("[.]")[0];
} else if (s.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) {
return s.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1).split("[.]")[0];
}
return s;
private static String fieldOrFacetToFQN(String fieldOrFacet) {
return fieldOrFacet
.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)
.replace(".keyword", "")
.replace(".delimited", "");
}

public static Date toDate(PrimitivePropertyValue value) throws DateTimeParseException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ public class AggregationQueryBuilderTest {
public void setup() throws RemoteInvocationException, URISyntaxException {
Urn helloUrn = Urn.createFromString("urn:li:structuredProperty:hello");
Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten");
Urn underscoresAndDotsUrn =
Urn.createFromString("urn:li:structuredProperty:under.scores.and.dots_make_a_mess");

// legacy
aspectRetriever = mock(AspectRetriever.class);
Expand Down Expand Up @@ -75,6 +77,21 @@ public void setup() throws RemoteInvocationException, URISyntaxException {
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropAbFghTenDefinition.data()))));

StructuredPropertyDefinition structPropUnderscoresAndDotsDefinition =
new StructuredPropertyDefinition();
structPropUnderscoresAndDotsDefinition.setVersion(null, SetMode.REMOVE_IF_NULL);
structPropUnderscoresAndDotsDefinition.setValueType(
Urn.createFromString(DATA_TYPE_URN_PREFIX + "string"));
structPropUnderscoresAndDotsDefinition.setQualifiedName("under.scores.and.dots_make_a_mess");
structPropUnderscoresAndDotsDefinition.setDisplayName("under.scores.and.dots_make_a_mess");
when(aspectRetriever.getLatestAspectObjects(eq(Set.of(underscoresAndDotsUrn)), anySet()))
.thenReturn(
Map.of(
underscoresAndDotsUrn,
Map.of(
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropUnderscoresAndDotsDefinition.data()))));

// V1
aspectRetrieverV1 = mock(AspectRetriever.class);
when(aspectRetrieverV1.getEntityRegistry())
Expand Down Expand Up @@ -105,6 +122,21 @@ public void setup() throws RemoteInvocationException, URISyntaxException {
Map.of(
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropAbFghTenDefinitionV1.data()))));

StructuredPropertyDefinition structPropUnderscoresAndDotsDefinitionV1 =
new StructuredPropertyDefinition();
structPropUnderscoresAndDotsDefinitionV1.setVersion("00000000000001");
structPropUnderscoresAndDotsDefinitionV1.setValueType(
Urn.createFromString(DATA_TYPE_URN_PREFIX + "string"));
structPropUnderscoresAndDotsDefinitionV1.setQualifiedName("under.scores.and.dots_make_a_mess");
structPropUnderscoresAndDotsDefinitionV1.setDisplayName("under.scores.and.dots_make_a_mess");
when(aspectRetrieverV1.getLatestAspectObjects(eq(Set.of(underscoresAndDotsUrn)), anySet()))
.thenReturn(
Map.of(
underscoresAndDotsUrn,
Map.of(
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropUnderscoresAndDotsDefinitionV1.data()))));
}

@Test
Expand Down Expand Up @@ -269,6 +301,46 @@ public void testAggregateOverStructuredProperty() {
Set.of("structuredProperties.ab_fgh_ten.keyword", "structuredProperties.hello.keyword"));
}

@Test
public void testAggregateOverStructuredPropertyNamespaced() {
SearchConfiguration config = new SearchConfiguration();
config.setMaxTermBucketSize(25);

AggregationQueryBuilder builder =
new AggregationQueryBuilder(
config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()));

List<AggregationBuilder> aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever),
List.of("structuredProperties.under.scores.and.dots_make_a_mess"));
Assert.assertEquals(aggs.size(), 1);
AggregationBuilder aggBuilder = aggs.get(0);
Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
// Check that field name is sanitized to correct field name
Assert.assertEquals(
agg.field(),
"structuredProperties.under_scores_and_dots_make_a_mess.keyword",
"Terms aggregate must be on a keyword or subfield keyword");

// Two structured properties
aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever),
List.of(
"structuredProperties.under.scores.and.dots_make_a_mess",
"structuredProperties.hello"));
Assert.assertEquals(aggs.size(), 2);
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
Set.of(
"structuredProperties.under_scores_and_dots_make_a_mess.keyword",
"structuredProperties.hello.keyword"));
}

@Test
public void testAggregateOverStructuredPropertyV1() {
SearchConfiguration config = new SearchConfiguration();
Expand Down Expand Up @@ -309,6 +381,46 @@ public void testAggregateOverStructuredPropertyV1() {
"structuredProperties._versioned.hello.00000000000001.string.keyword"));
}

@Test
public void testAggregateOverStructuredPropertyNamespacedV1() {
SearchConfiguration config = new SearchConfiguration();
config.setMaxTermBucketSize(25);

AggregationQueryBuilder builder =
new AggregationQueryBuilder(
config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()));

List<AggregationBuilder> aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1),
List.of("structuredProperties.under.scores.and.dots_make_a_mess"));
Assert.assertEquals(aggs.size(), 1);
AggregationBuilder aggBuilder = aggs.get(0);
Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
// Check that field name is sanitized to correct field name
Assert.assertEquals(
agg.field(),
"structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword",
"Terms aggregation must be on a keyword field or subfield.");

// Two structured properties
aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1),
List.of(
"structuredProperties.under.scores.and.dots_make_a_mess",
"structuredProperties._versioned.hello.00000000000001.string"));
Assert.assertEquals(aggs.size(), 2);
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
Set.of(
"structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword",
"structuredProperties._versioned.hello.00000000000001.string.keyword"));
}

@Test
public void testAggregateOverFieldsAndStructProp() {
SearchableAnnotation annotation1 =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
package com.linkedin.metadata.search.query.request;

import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES;
import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME;
import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion;
import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion;
import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion;
import static com.linkedin.metadata.utils.SearchUtil.*;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.testng.Assert.*;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.DataMap;
import com.linkedin.data.template.StringArray;
import com.linkedin.datahub.graphql.generated.EntityType;
import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper;
import com.linkedin.entity.Aspect;
import com.linkedin.metadata.TestEntitySpecBuilder;
import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.config.search.ExactMatchConfiguration;
import com.linkedin.metadata.config.search.PartialConfiguration;
import com.linkedin.metadata.config.search.SearchConfiguration;
import com.linkedin.metadata.config.search.WordGramConfiguration;
import com.linkedin.metadata.entity.SearchRetriever;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.models.StructuredPropertyUtils;
import com.linkedin.metadata.query.filter.Condition;
import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray;
Expand All @@ -28,6 +38,8 @@
import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain;
import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler;
import io.datahubproject.metadata.context.OperationContext;
import io.datahubproject.metadata.context.RetrieverContext;
import io.datahubproject.test.metadata.context.TestOperationContexts;
import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
import java.util.ArrayList;
import java.util.Collection;
Expand Down Expand Up @@ -633,6 +645,49 @@ public void testBrowsePathQueryFilter() {
assertEquals(((ExistsQueryBuilder) mustHaveV1.must().get(0)).fieldName(), "browsePaths");
}

@Test(expectedExceptions = IllegalArgumentException.class)
public void testInvalidStructuredProperty() {
AspectRetriever aspectRetriever = mock(AspectRetriever.class);
Map<Urn, Map<String, Aspect>> aspectResponse = new HashMap<>();
DataMap statusData = new DataMap();
statusData.put("removed", true);
Aspect status = new Aspect(statusData);
Urn structPropUrn = StructuredPropertyUtils.toURNFromFQN("under.scores.and.dots.make_a_mess");
aspectResponse.put(structPropUrn, ImmutableMap.of(STATUS_ASPECT_NAME, status));
when(aspectRetriever.getLatestAspectObjects(
Collections.singleton(structPropUrn), ImmutableSet.of(STATUS_ASPECT_NAME)))
.thenReturn(aspectResponse);
OperationContext mockRetrieverContext =
TestOperationContexts.systemContextNoSearchAuthorization(
RetrieverContext.builder()
.aspectRetriever(aspectRetriever)
.graphRetriever(mock(GraphRetriever.class))
.searchRetriever(mock(SearchRetriever.class))
.build());

Criterion structuredPropCriterion =
buildExistsCriterion("structuredProperties.under.scores.and.dots.make_a_mess");

CriterionArray criterionArray = new CriterionArray();
criterionArray.add(structuredPropCriterion);

ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion();
conjunctiveCriterion.setAnd(criterionArray);

ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray();
conjunctiveCriterionArray.add(conjunctiveCriterion);

Filter filter = new Filter();
filter.setOr(conjunctiveCriterionArray);

BoolQueryBuilder test =
SearchRequestHandler.getFilterQuery(
mockRetrieverContext.withSearchFlags(flags -> flags.setFulltext(false)),
filter,
new HashMap<>(),
QueryFilterRewriteChain.EMPTY);
}

@Test
public void testQueryByDefault() {
final Set<String> COMMON =
Expand Down
Loading

0 comments on commit cf3b08f

Please sign in to comment.