Skip to content

Commit

Permalink
Bad seek range (#2620)
Browse files Browse the repository at this point in the history
* Updated filtering so index-only fields are omitted from seek ranges related to event queries.

* Return deep copies in ParentQueryIterator, AncestorQueryIterator, and TLDQueryIterator

---------

Co-authored-by: Ivan Bella <[email protected]>
Co-authored-by: hgklohr <[email protected]>
  • Loading branch information
3 people authored Nov 7, 2024
1 parent 86a593e commit 38e3052
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,22 @@ public EventDataQueryFilter getEventFilter() {
return getEvaluationFilter();
}

@Override
public EventDataQueryFilter getFiEvaluationFilter() {
if (fiEvaluationFilter == null) {
fiEvaluationFilter = getEvaluationFilter();
}
return fiEvaluationFilter.clone();
}

@Override
public EventDataQueryFilter getEventEvaluationFilter() {
if (eventEvaluationFilter == null) {
eventEvaluationFilter = getEvaluationFilter();
}
return eventEvaluationFilter.clone();
}

@Override
protected JexlEvaluation getJexlEvaluation(NestedQueryIterator<Key> documentSource) {
return new JexlEvaluation(query, getArithmetic()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,22 @@ public EventDataQueryFilter getEventFilter() {
return getEvaluationFilter();
}

@Override
public EventDataQueryFilter getFiEvaluationFilter() {
if (fiEvaluationFilter == null) {
fiEvaluationFilter = getEvaluationFilter();
}
return fiEvaluationFilter.clone();
}

@Override
public EventDataQueryFilter getEventEvaluationFilter() {
if (eventEvaluationFilter == null) {
eventEvaluationFilter = getEvaluationFilter();
}
return eventEvaluationFilter.clone();
}

@Override
public Iterator<Entry<Key,Document>> mapDocument(SortedKeyValueIterator<Key,Value> deepSourceCopy, Iterator<Entry<Key,Document>> documents,
CompositeMetadata compositeMetadata) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ public Entry<DocumentData,Document> apply(@Nullable Entry<Key,Document> input) {
};
} else {
// @formatter:off
docMapper = new KeyToDocumentData(deepSourceCopy, myEnvironment, documentOptions, getEquality(), getEventFilter(), this.includeHierarchyFields,
docMapper = new KeyToDocumentData(deepSourceCopy, myEnvironment, documentOptions, getEquality(), getEventEvaluationFilter(), this.includeHierarchyFields,
this.includeHierarchyFields)
.withRangeProvider(getRangeProvider())
.withAggregationThreshold(getDocAggregationThresholdMs());
Expand All @@ -790,7 +790,7 @@ public Entry<DocumentData,Document> apply(@Nullable Entry<Key,Document> input) {
// which do not fall within the expected time range
Iterator<Entry<Key,Document>> documents = null;
Aggregation a = new Aggregation(this.getTimeFilter(), this.typeMetadataWithNonIndexed, compositeMetadata, this.isIncludeGroupingContext(),
this.includeRecordId, this.disableIndexOnlyDocuments(), getEvaluationFilter(), isTrackSizes());
this.includeRecordId, this.disableIndexOnlyDocuments(), getEventEvaluationFilter(), isTrackSizes());
if (gatherTimingDetails()) {
documents = Iterators.transform(sourceIterator, new EvaluationTrackingFunction<>(QuerySpan.Stage.Aggregation, trackingSpan, a));
} else {
Expand Down Expand Up @@ -1099,7 +1099,7 @@ protected Iterator<Entry<Key,Document>> mapDocument(SortedKeyValueIterator<Key,V
if (fieldIndexSatisfiesQuery) {
// @formatter:off
final KeyToDocumentData docMapper = new KeyToDocumentData(deepSourceCopy, this.myEnvironment, this.documentOptions, getEquality(),
getEventFilter(), this.includeHierarchyFields, this.includeHierarchyFields)
getEventEvaluationFilter(), this.includeHierarchyFields, this.includeHierarchyFields)
.withRangeProvider(getRangeProvider())
.withAggregationThreshold(getDocAggregationThresholdMs());
// @formatter:on
Expand All @@ -1108,7 +1108,7 @@ protected Iterator<Entry<Key,Document>> mapDocument(SortedKeyValueIterator<Key,V
new GetDocument(docMapper,
new Aggregation(this.getTimeFilter(), typeMetadataWithNonIndexed, compositeMetadata,
this.isIncludeGroupingContext(), this.includeRecordId, this.disableIndexOnlyDocuments(),
getEvaluationFilter(), isTrackSizes())));
getEventEvaluationFilter(), isTrackSizes())));
Iterator<Entry<Key,Document>> retDocuments = Iterators.transform(mappedDocuments, new TupleToEntry<>());

// Inject the document permutations if required
Expand Down Expand Up @@ -1428,6 +1428,8 @@ protected IteratorBuildingVisitor createIteratorBuildingVisitor(Class<? extends
.setTypeMetadata(this.getTypeMetadata())
.setFieldsToAggregate(this.getNonEventFields())
.setAttrFilter(this.getEvaluationFilter())
.setFiAttrFilter(this.getFiEvaluationFilter())
.setEventAttrFilter(this.getEventEvaluationFilter()) // needed document range TF optimization
.setDatatypeFilter(this.getFieldIndexKeyDataTypeFilter())
.setFiAggregator(this.getFiAggregator())
.setHdfsFileSystem(this.getFileSystemCache())
Expand Down Expand Up @@ -1602,7 +1604,7 @@ protected ActiveQueryLog getActiveQueryLog() {
@Override
public FieldIndexAggregator getFiAggregator() {
if (fiAggregator == null) {
fiAggregator = new IdentityAggregator(getAllIndexOnlyFields(), getEvaluationFilter(), getEventNextSeek());
fiAggregator = new IdentityAggregator(getAllIndexOnlyFields(), getFiEvaluationFilter(), getEventNextSeek());
}
return fiAggregator;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,8 @@ public class QueryOptions implements OptionDescriber {

// filter for any key type (fi, event, tf)
protected EventDataQueryFilter evaluationFilter;
protected EventDataQueryFilter fiEvaluationFilter;
protected EventDataQueryFilter eventEvaluationFilter;
// filter specifically for event keys. required when performing a seeking aggregation
protected EventDataQueryFilter eventFilter;

Expand Down Expand Up @@ -503,6 +505,8 @@ public void deepCopy(QueryOptions other) {
this.getDocumentKey = other.getDocumentKey;
this.equality = other.equality;
this.evaluationFilter = other.evaluationFilter;
this.fiEvaluationFilter = other.fiEvaluationFilter;
this.eventEvaluationFilter = other.eventEvaluationFilter;

this.ivaratorCacheDirConfigs = (other.ivaratorCacheDirConfigs == null) ? null : new ArrayList<>(other.ivaratorCacheDirConfigs);
this.hdfsSiteConfigURLs = other.hdfsSiteConfigURLs;
Expand Down Expand Up @@ -775,7 +779,7 @@ public void setArithmetic(JexlArithmetic arithmetic) {
*/
public FieldIndexAggregator getFiAggregator() {
if (fiAggregator == null) {
this.fiAggregator = new IdentityAggregator(getNonEventFields(), getEvaluationFilter(), getEventNextSeek());
this.fiAggregator = new IdentityAggregator(getNonEventFields(), getFiEvaluationFilter(), getEventNextSeek());
}
return fiAggregator;
}
Expand All @@ -784,10 +788,26 @@ public EventDataQueryFilter getEvaluationFilter() {
return evaluationFilter != null ? evaluationFilter.clone() : null;
}

public EventDataQueryFilter getFiEvaluationFilter() {
return fiEvaluationFilter != null ? fiEvaluationFilter.clone() : null;
}

public EventDataQueryFilter getEventEvaluationFilter() {
return eventEvaluationFilter != null ? eventEvaluationFilter.clone() : null;
}

public void setEvaluationFilter(EventDataQueryFilter evaluationFilter) {
this.evaluationFilter = evaluationFilter;
}

public void setFiEvaluationFilter(EventDataQueryFilter fiEvaluationFilter) {
this.fiEvaluationFilter = fiEvaluationFilter;
}

public void setEventEvaluationFilter(EventDataQueryFilter eventEvaluationFilter) {
this.eventEvaluationFilter = eventEvaluationFilter;
}

/**
* Return or build a field filter IFF this query is projecting results
*
Expand Down Expand Up @@ -1482,6 +1502,8 @@ public boolean validateOptions(Map<String,String> options) {
}

this.evaluationFilter = null;
this.fiEvaluationFilter = null;
this.eventEvaluationFilter = null;
this.getDocumentKey = GetStartKey.instance();
this.mustUseFieldIndex = false;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ public class IteratorBuildingVisitor extends BaseVisitor {

protected TypeMetadata typeMetadata;
protected EventDataQueryFilter attrFilter;
protected EventDataQueryFilter fiAttrFilter;
protected EventDataQueryFilter eventAttrFilter;
protected Set<String> fieldsToAggregate = Collections.emptySet();
protected Set<String> termFrequencyFields = Collections.emptySet();
protected boolean allowTermFrequencyLookup = true;
Expand Down Expand Up @@ -458,7 +460,9 @@ private NestedIterator<Key> buildExceededFromTermFrequency(String identifier, Je
builder.setTypeMetadata(typeMetadata);
builder.setFieldsToAggregate(fieldsToAggregate);
builder.setTimeFilter(timeFilter);
builder.setAttrFilter(attrFilter);
// this code path is only executed in the context of a document range. This optimization scans
// the TF directly instead of the FI.
builder.setAttrFilter(eventAttrFilter);
builder.setEnv(env);
builder.setTermFrequencyAggregator(getTermFrequencyAggregator(identifier, sourceNode, attrFilter, tfNextSeek));
builder.setNode(rootNode);
Expand Down Expand Up @@ -1566,6 +1570,16 @@ public IteratorBuildingVisitor setAttrFilter(EventDataQueryFilter attrFilter) {
return this;
}

public IteratorBuildingVisitor setFiAttrFilter(EventDataQueryFilter fiAttrFilter) {
this.fiAttrFilter = fiAttrFilter;
return this;
}

public IteratorBuildingVisitor setEventAttrFilter(EventDataQueryFilter eventAttrFilter) {
this.eventAttrFilter = eventAttrFilter;
return this;
}

public IteratorBuildingVisitor setDatatypeFilter(Predicate<Key> datatypeFilter) {
this.datatypeFilter = datatypeFilter;
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
Expand Down Expand Up @@ -102,38 +103,66 @@ public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> op
@Override
public FieldIndexAggregator getFiAggregator() {
if (fiAggregator == null) {
fiAggregator = new TLDFieldIndexAggregator(getNonEventFields(), getFIEvaluationFilter(), getFiNextSeek());
fiAggregator = new TLDFieldIndexAggregator(getNonEventFields(), getFiEvaluationFilter(), getFiNextSeek());
}
return fiAggregator;
}

@Override
public EventDataQueryFilter getEvaluationFilter() {
if (this.evaluationFilter == null && script != null) {

AttributeFactory attributeFactory = new AttributeFactory(typeMetadata);
Map<String,ExpressionFilter> expressionFilters = getExpressionFilters(script, attributeFactory);

// setup an evaluation filter to avoid loading every single child key into the event
this.evaluationFilter = new TLDEventDataFilter(script, getAllFields(), expressionFilters, useAllowListedFields ? allowListedFields : null,
useDisallowListedFields ? disallowListedFields : null, getEventFieldSeek(), getEventNextSeek(),
limitFieldsPreQueryEvaluation ? limitFieldsMap : Collections.emptyMap(), limitFieldsField, getNonEventFields());
}
return this.evaluationFilter != null ? evaluationFilter.clone() : null;
}

/**
* Distinct from getEvaluation filter as the FI filter is used to prevent FI hits on nonEventFields that are not indexOnly fields
*
* @return an {@link EventDataQueryFilter}
*/
protected EventDataQueryFilter getFIEvaluationFilter() {
ChainableEventDataQueryFilter filterChain = new ChainableEventDataQueryFilter();
// primary filter on the current filter
filterChain.addFilter(getEvaluationFilter());
// prevent anything that is not an index only field from being kept at the tld level, otherwise allow all
filterChain.addFilter(new TLDFieldIndexQueryFilter(getIndexOnlyFields()));
return filterChain;
@Override
public EventDataQueryFilter getFiEvaluationFilter() {
if (fiEvaluationFilter == null && script != null) {
if (QueryIterator.isDocumentSpecificRange(range)) {
// this is to deal with a TF optimization where the TF is scanned instead of the FI in the
// document specific case.
fiEvaluationFilter = getEventEvaluationFilter();
} else {
fiEvaluationFilter = new TLDFieldIndexQueryFilter(getIndexOnlyFields());
}

return fiEvaluationFilter.clone();
}
return fiEvaluationFilter != null ? fiEvaluationFilter.clone() : null;
}

@Override
public EventDataQueryFilter getEvaluationFilter() {
if (this.evaluationFilter == null && script != null) {
public EventDataQueryFilter getEventEvaluationFilter() {
if (this.eventEvaluationFilter == null && script != null) {

AttributeFactory attributeFactory = new AttributeFactory(typeMetadata);
Map<String,ExpressionFilter> expressionFilters = getExpressionFilters(script, attributeFactory);

// setup an evaluation filter to avoid loading every single child key into the event
this.evaluationFilter = new TLDEventDataFilter(script, getAllFields(), expressionFilters, useAllowListedFields ? allowListedFields : null,
this.eventEvaluationFilter = new TLDEventDataFilter(script, getEventFields(), expressionFilters, useAllowListedFields ? allowListedFields : null,
useDisallowListedFields ? disallowListedFields : null, getEventFieldSeek(), getEventNextSeek(),
limitFieldsPreQueryEvaluation ? limitFieldsMap : Collections.emptyMap(), limitFieldsField, getNonEventFields());
}
return this.evaluationFilter != null ? evaluationFilter.clone() : null;
return this.eventEvaluationFilter != null ? eventEvaluationFilter.clone() : null;
}

public Set<String> getEventFields() {
Set<String> fields = getAllFields();
fields.removeAll(getIndexOnlyFields());
return fields;
}

/**
Expand Down

0 comments on commit 38e3052

Please sign in to comment.