From 38e3052138b9c21de9ebe253f2e94d6ec6a37e74 Mon Sep 17 00:00:00 2001 From: jack-gitdev <126913603+jack-gitdev@users.noreply.github.com> Date: Thu, 7 Nov 2024 17:55:38 -0500 Subject: [PATCH] Bad seek range (#2620) * Updated filtering so index-only fields are omitted from seek ranges related to event queries. * Return deep copies in ParentQueryIterator, AncestorQueryIterator, and TLDQueryIterator --------- Co-authored-by: Ivan Bella Co-authored-by: hgklohr --- .../query/ancestor/AncestorQueryIterator.java | 16 ++++++ .../query/iterator/ParentQueryIterator.java | 16 ++++++ .../query/iterator/QueryIterator.java | 12 +++-- .../datawave/query/iterator/QueryOptions.java | 24 ++++++++- .../visitors/IteratorBuildingVisitor.java | 16 +++++- .../datawave/query/tld/TLDQueryIterator.java | 53 ++++++++++++++----- 6 files changed, 118 insertions(+), 19 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/ancestor/AncestorQueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/ancestor/AncestorQueryIterator.java index 41d01544c58..7528a24e9f6 100644 --- a/warehouse/query-core/src/main/java/datawave/query/ancestor/AncestorQueryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/ancestor/AncestorQueryIterator.java @@ -148,6 +148,22 @@ public EventDataQueryFilter getEventFilter() { return getEvaluationFilter(); } + @Override + public EventDataQueryFilter getFiEvaluationFilter() { + if (fiEvaluationFilter == null) { + fiEvaluationFilter = getEvaluationFilter(); + } + return fiEvaluationFilter.clone(); + } + + @Override + public EventDataQueryFilter getEventEvaluationFilter() { + if (eventEvaluationFilter == null) { + eventEvaluationFilter = getEvaluationFilter(); + } + return eventEvaluationFilter.clone(); + } + @Override protected JexlEvaluation getJexlEvaluation(NestedQueryIterator documentSource) { return new JexlEvaluation(query, getArithmetic()) { diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/ParentQueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/ParentQueryIterator.java index c0190f18d2d..63a612f7b3e 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/ParentQueryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/ParentQueryIterator.java @@ -78,6 +78,22 @@ public EventDataQueryFilter getEventFilter() { return getEvaluationFilter(); } + @Override + public EventDataQueryFilter getFiEvaluationFilter() { + if (fiEvaluationFilter == null) { + fiEvaluationFilter = getEvaluationFilter(); + } + return fiEvaluationFilter.clone(); + } + + @Override + public EventDataQueryFilter getEventEvaluationFilter() { + if (eventEvaluationFilter == null) { + eventEvaluationFilter = getEvaluationFilter(); + } + return eventEvaluationFilter.clone(); + } + @Override public Iterator> mapDocument(SortedKeyValueIterator deepSourceCopy, Iterator> documents, CompositeMetadata compositeMetadata) { diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java index 8f101999ffc..6509b471db3 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java @@ -773,7 +773,7 @@ public Entry apply(@Nullable Entry input) { }; } else { // @formatter:off - docMapper = new KeyToDocumentData(deepSourceCopy, myEnvironment, documentOptions, getEquality(), getEventFilter(), this.includeHierarchyFields, + docMapper = new KeyToDocumentData(deepSourceCopy, myEnvironment, documentOptions, getEquality(), getEventEvaluationFilter(), this.includeHierarchyFields, this.includeHierarchyFields) .withRangeProvider(getRangeProvider()) .withAggregationThreshold(getDocAggregationThresholdMs()); @@ -790,7 +790,7 @@ public Entry apply(@Nullable Entry input) { // which do not fall within the expected time range Iterator> documents = null; Aggregation a = new Aggregation(this.getTimeFilter(), this.typeMetadataWithNonIndexed, compositeMetadata, this.isIncludeGroupingContext(), - this.includeRecordId, this.disableIndexOnlyDocuments(), getEvaluationFilter(), isTrackSizes()); + this.includeRecordId, this.disableIndexOnlyDocuments(), getEventEvaluationFilter(), isTrackSizes()); if (gatherTimingDetails()) { documents = Iterators.transform(sourceIterator, new EvaluationTrackingFunction<>(QuerySpan.Stage.Aggregation, trackingSpan, a)); } else { @@ -1099,7 +1099,7 @@ protected Iterator> mapDocument(SortedKeyValueIterator> mapDocument(SortedKeyValueIterator> retDocuments = Iterators.transform(mappedDocuments, new TupleToEntry<>()); // Inject the document permutations if required @@ -1428,6 +1428,8 @@ protected IteratorBuildingVisitor createIteratorBuildingVisitor(Class(other.ivaratorCacheDirConfigs); this.hdfsSiteConfigURLs = other.hdfsSiteConfigURLs; @@ -775,7 +779,7 @@ public void setArithmetic(JexlArithmetic arithmetic) { */ public FieldIndexAggregator getFiAggregator() { if (fiAggregator == null) { - this.fiAggregator = new IdentityAggregator(getNonEventFields(), getEvaluationFilter(), getEventNextSeek()); + this.fiAggregator = new IdentityAggregator(getNonEventFields(), getFiEvaluationFilter(), getEventNextSeek()); } return fiAggregator; } @@ -784,10 +788,26 @@ public EventDataQueryFilter getEvaluationFilter() { return evaluationFilter != null ? evaluationFilter.clone() : null; } + public EventDataQueryFilter getFiEvaluationFilter() { + return fiEvaluationFilter != null ? fiEvaluationFilter.clone() : null; + } + + public EventDataQueryFilter getEventEvaluationFilter() { + return eventEvaluationFilter != null ? eventEvaluationFilter.clone() : null; + } + public void setEvaluationFilter(EventDataQueryFilter evaluationFilter) { this.evaluationFilter = evaluationFilter; } + public void setFiEvaluationFilter(EventDataQueryFilter fiEvaluationFilter) { + this.fiEvaluationFilter = fiEvaluationFilter; + } + + public void setEventEvaluationFilter(EventDataQueryFilter eventEvaluationFilter) { + this.eventEvaluationFilter = eventEvaluationFilter; + } + /** * Return or build a field filter IFF this query is projecting results * @@ -1482,6 +1502,8 @@ public boolean validateOptions(Map options) { } this.evaluationFilter = null; + this.fiEvaluationFilter = null; + this.eventEvaluationFilter = null; this.getDocumentKey = GetStartKey.instance(); this.mustUseFieldIndex = false; diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IteratorBuildingVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IteratorBuildingVisitor.java index 185f0c074a4..e2caf83fa8d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IteratorBuildingVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IteratorBuildingVisitor.java @@ -162,6 +162,8 @@ public class IteratorBuildingVisitor extends BaseVisitor { protected TypeMetadata typeMetadata; protected EventDataQueryFilter attrFilter; + protected EventDataQueryFilter fiAttrFilter; + protected EventDataQueryFilter eventAttrFilter; protected Set fieldsToAggregate = Collections.emptySet(); protected Set termFrequencyFields = Collections.emptySet(); protected boolean allowTermFrequencyLookup = true; @@ -458,7 +460,9 @@ private NestedIterator buildExceededFromTermFrequency(String identifier, Je builder.setTypeMetadata(typeMetadata); builder.setFieldsToAggregate(fieldsToAggregate); builder.setTimeFilter(timeFilter); - builder.setAttrFilter(attrFilter); + // this code path is only executed in the context of a document range. This optimization scans + // the TF directly instead of the FI. + builder.setAttrFilter(eventAttrFilter); builder.setEnv(env); builder.setTermFrequencyAggregator(getTermFrequencyAggregator(identifier, sourceNode, attrFilter, tfNextSeek)); builder.setNode(rootNode); @@ -1566,6 +1570,16 @@ public IteratorBuildingVisitor setAttrFilter(EventDataQueryFilter attrFilter) { return this; } + public IteratorBuildingVisitor setFiAttrFilter(EventDataQueryFilter fiAttrFilter) { + this.fiAttrFilter = fiAttrFilter; + return this; + } + + public IteratorBuildingVisitor setEventAttrFilter(EventDataQueryFilter eventAttrFilter) { + this.eventAttrFilter = eventAttrFilter; + return this; + } + public IteratorBuildingVisitor setDatatypeFilter(Predicate datatypeFilter) { this.datatypeFilter = datatypeFilter; return this; diff --git a/warehouse/query-core/src/main/java/datawave/query/tld/TLDQueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/tld/TLDQueryIterator.java index 649f05ddc42..a4f45e14d81 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tld/TLDQueryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/tld/TLDQueryIterator.java @@ -9,6 +9,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; @@ -102,38 +103,66 @@ public void init(SortedKeyValueIterator source, Map op @Override public FieldIndexAggregator getFiAggregator() { if (fiAggregator == null) { - fiAggregator = new TLDFieldIndexAggregator(getNonEventFields(), getFIEvaluationFilter(), getFiNextSeek()); + fiAggregator = new TLDFieldIndexAggregator(getNonEventFields(), getFiEvaluationFilter(), getFiNextSeek()); } return fiAggregator; } + @Override + public EventDataQueryFilter getEvaluationFilter() { + if (this.evaluationFilter == null && script != null) { + + AttributeFactory attributeFactory = new AttributeFactory(typeMetadata); + Map expressionFilters = getExpressionFilters(script, attributeFactory); + + // setup an evaluation filter to avoid loading every single child key into the event + this.evaluationFilter = new TLDEventDataFilter(script, getAllFields(), expressionFilters, useAllowListedFields ? allowListedFields : null, + useDisallowListedFields ? disallowListedFields : null, getEventFieldSeek(), getEventNextSeek(), + limitFieldsPreQueryEvaluation ? limitFieldsMap : Collections.emptyMap(), limitFieldsField, getNonEventFields()); + } + return this.evaluationFilter != null ? evaluationFilter.clone() : null; + } + /** * Distinct from getEvaluation filter as the FI filter is used to prevent FI hits on nonEventFields that are not indexOnly fields * * @return an {@link EventDataQueryFilter} */ - protected EventDataQueryFilter getFIEvaluationFilter() { - ChainableEventDataQueryFilter filterChain = new ChainableEventDataQueryFilter(); - // primary filter on the current filter - filterChain.addFilter(getEvaluationFilter()); - // prevent anything that is not an index only field from being kept at the tld level, otherwise allow all - filterChain.addFilter(new TLDFieldIndexQueryFilter(getIndexOnlyFields())); - return filterChain; + @Override + public EventDataQueryFilter getFiEvaluationFilter() { + if (fiEvaluationFilter == null && script != null) { + if (QueryIterator.isDocumentSpecificRange(range)) { + // this is to deal with a TF optimization where the TF is scanned instead of the FI in the + // document specific case. + fiEvaluationFilter = getEventEvaluationFilter(); + } else { + fiEvaluationFilter = new TLDFieldIndexQueryFilter(getIndexOnlyFields()); + } + + return fiEvaluationFilter.clone(); + } + return fiEvaluationFilter != null ? fiEvaluationFilter.clone() : null; } @Override - public EventDataQueryFilter getEvaluationFilter() { - if (this.evaluationFilter == null && script != null) { + public EventDataQueryFilter getEventEvaluationFilter() { + if (this.eventEvaluationFilter == null && script != null) { AttributeFactory attributeFactory = new AttributeFactory(typeMetadata); Map expressionFilters = getExpressionFilters(script, attributeFactory); // setup an evaluation filter to avoid loading every single child key into the event - this.evaluationFilter = new TLDEventDataFilter(script, getAllFields(), expressionFilters, useAllowListedFields ? allowListedFields : null, + this.eventEvaluationFilter = new TLDEventDataFilter(script, getEventFields(), expressionFilters, useAllowListedFields ? allowListedFields : null, useDisallowListedFields ? disallowListedFields : null, getEventFieldSeek(), getEventNextSeek(), limitFieldsPreQueryEvaluation ? limitFieldsMap : Collections.emptyMap(), limitFieldsField, getNonEventFields()); } - return this.evaluationFilter != null ? evaluationFilter.clone() : null; + return this.eventEvaluationFilter != null ? eventEvaluationFilter.clone() : null; + } + + public Set getEventFields() { + Set fields = getAllFields(); + fields.removeAll(getIndexOnlyFields()); + return fields; } /**