otherCandidates = new LinkedList<>();
+
+ for (int i = 0; i < node.jjtGetNumChildren(); i++) {
+ JexlNode child = node.jjtGetChild(i);
+
+ // this seems expensive, a visitor that returned raw counts, depth, and complexity would nice to have
+ NodeTypeCount counts = NodeTypeCountVisitor.countNodes(child, ASTERNode.class);
+
+ if (anchorDetectionVisitor.isAnchor(child)) {
+ if (counts.getTotal(ASTERNode.class) > 0) {
+ anchorCandidates.add(child);
+ } else {
+ anchorNonCandidates.add(child);
+ }
+ } else if (counts.getTotal(ASTERNode.class) > 0) {
+ otherCandidates.add(child);
+ }
+ }
+
+ if (!anchorCandidates.isEmpty() || !anchorNonCandidates.isEmpty()) {
+
+ if (!anchorNonCandidates.isEmpty()) {
+ // rewrite all anchor candidates
+ for (JexlNode candidate : anchorCandidates) {
+ candidate.jjtAccept(this, true);
+ }
+ } else {
+ // rewrite all anchor candidates except the last one, to preserve executability
+ for (int i = 0; i < anchorCandidates.size() - 1; i++) {
+ anchorCandidates.get(i).jjtAccept(this, true);
+ }
+ }
+
+ // if any anchor exists, rewrite other candidates
+ for (JexlNode otherCandidate : otherCandidates) {
+ otherCandidate.jjtAccept(this, true);
+ }
+ }
+
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTERNode node, Object data) {
+ String field = JexlASTHelper.getIdentifier(node);
+
+ if (isLegalRewrite(field, data)) {
+
+ // once legality of rewrite is established make sure it's not filtered
+ String literal = (String) JexlASTHelper.getLiteralValue(node);
+
+ if (isNodeRewritableFromRules(field, literal)) {
+ JexlNode rewrite = JexlNodeFactory.buildFunctionNode("filter", "includeRegex", field, literal);
+ JexlNodes.replaceChild(node.jjtGetParent(), node, rewrite);
+ }
+ }
+
+ return data;
+ }
+
+ private boolean isLegalRewrite(String field, Object data) {
+ // never rewrite ANY_FIELD or index-only fields
+ if (field.equals(Constants.ANY_FIELD) || indexOnlyFields.contains(field)) {
+ return false;
+ }
+
+ // 1. anchor exists elsewhere
+ // 2. field is not indexed
+ return data instanceof Boolean || !indexedFields.contains(field);
+ }
+
+ /**
+ * Determine if the node can be rewritten given any configured rules (include fields, exclude fields, patterns)
+ *
+ * @param field
+ * the field
+ * @param literal
+ * the literal
+ * @return true if the node can be rewritten
+ */
+ private boolean isNodeRewritableFromRules(String field, String literal) {
+ // check patterns first because they supersede include/exclude rules
+ for (RegexRewritePattern pattern : patterns) {
+ if (pattern.matches(field, literal)) {
+ return true;
+ }
+ }
+
+ // exclude fields beat include fields
+ if (!excludeFields.isEmpty() && excludeFields.contains(field)) {
+ return false;
+ }
+
+ if (!includeFields.isEmpty()) {
+ return includeFields.contains(field);
+ }
+
+ return true;
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java
new file mode 100644
index 00000000000..018658b1eb2
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java
@@ -0,0 +1,208 @@
+package datawave.query.jexl.visitors.pushdown;
+
+import java.util.Set;
+
+import org.apache.commons.jexl3.parser.ASTAndNode;
+import org.apache.commons.jexl3.parser.ASTAssignment;
+import org.apache.commons.jexl3.parser.ASTEQNode;
+import org.apache.commons.jexl3.parser.ASTERNode;
+import org.apache.commons.jexl3.parser.ASTFunctionNode;
+import org.apache.commons.jexl3.parser.ASTGENode;
+import org.apache.commons.jexl3.parser.ASTGTNode;
+import org.apache.commons.jexl3.parser.ASTJexlScript;
+import org.apache.commons.jexl3.parser.ASTLENode;
+import org.apache.commons.jexl3.parser.ASTLTNode;
+import org.apache.commons.jexl3.parser.ASTNENode;
+import org.apache.commons.jexl3.parser.ASTNRNode;
+import org.apache.commons.jexl3.parser.ASTNotNode;
+import org.apache.commons.jexl3.parser.ASTOrNode;
+import org.apache.commons.jexl3.parser.ASTReference;
+import org.apache.commons.jexl3.parser.ASTReferenceExpression;
+import org.apache.commons.jexl3.parser.JexlNode;
+
+import datawave.query.jexl.JexlASTHelper;
+import datawave.query.jexl.nodes.QueryPropertyMarker;
+import datawave.query.jexl.visitors.ShortCircuitBaseVisitor;
+
+/**
+ * Determines if a subtree is an anchor for a given query
+ *
+ * An anchor is defined as an executable leaf or subtree.
+ */
+public class AnchorDetectionVisitor extends ShortCircuitBaseVisitor {
+
+ private final Set indexedFields;
+ private final Set indexOnlyFields;
+
+ /**
+ * Default constructor
+ *
+ * @param indexedFields
+ * the set of indexed query fields
+ * @param indexOnlyFields
+ * the set of index only query fields
+ */
+ public AnchorDetectionVisitor(Set indexedFields, Set indexOnlyFields) {
+ this.indexedFields = indexedFields;
+ this.indexOnlyFields = indexOnlyFields;
+ }
+
+ public boolean isAnchor(JexlNode node) {
+ return (boolean) node.jjtAccept(this, null);
+ }
+
+ // pass through nodes
+
+ @Override
+ public Object visit(ASTJexlScript node, Object data) {
+ return node.jjtGetChild(0).jjtAccept(this, data);
+ }
+
+ @Override
+ public Object visit(ASTReference node, Object data) {
+ return node.jjtGetChild(0).jjtAccept(this, data);
+ }
+
+ @Override
+ public Object visit(ASTReferenceExpression node, Object data) {
+ return node.jjtGetChild(0).jjtAccept(this, data);
+ }
+
+ @Override
+ public Object visit(ASTAssignment node, Object data) {
+ return false;
+ }
+
+ @Override
+ public Object visit(ASTNotNode node, Object data) {
+ return false;
+ }
+
+ // junction nodes
+
+ /**
+ * An OrNode is considered an anchor if and only if all children are anchor nodes
+ *
+ * @param node
+ * a JexlNode
+ * @param data
+ * an Object
+ * @return True if this node is an anchor
+ */
+ @Override
+ public Object visit(ASTOrNode node, Object data) {
+ for (int i = 0; i < node.jjtGetNumChildren(); i++) {
+ boolean childIsAnchor = (boolean) node.jjtGetChild(i).jjtAccept(this, data);
+ if (!childIsAnchor) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * An AndNode is considered an anchor if at least one child node is an anchor
+ *
+ * @param node
+ * a JexlNode
+ * @param data
+ * an Object
+ * @return True if this node is an anchor
+ */
+ @Override
+ public Object visit(ASTAndNode node, Object data) {
+ QueryPropertyMarker.Instance instance = QueryPropertyMarker.findInstance(node);
+ if (instance.isAnyType()) {
+ return visitMarker(instance);
+ }
+
+ for (int i = 0; i < node.jjtGetNumChildren(); i++) {
+ boolean isChildAnchor = (boolean) node.jjtGetChild(i).jjtAccept(this, data);
+ if (isChildAnchor) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // leaf nodes
+
+ @Override
+ public Object visit(ASTEQNode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTNENode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTLTNode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTGTNode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTLENode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTGENode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTERNode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTNRNode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTFunctionNode node, Object data) {
+ return false;
+ }
+
+ private boolean visitLeaf(JexlNode node) {
+ String field = JexlASTHelper.getIdentifier(node, true);
+ if (indexedFields.contains(field) || indexOnlyFields.contains(field)) {
+ if (node instanceof ASTEQNode || node instanceof ASTNENode) {
+ Object value = JexlASTHelper.getLiteralValue(node);
+ return value != null;
+ }
+ return true;
+ }
+ return false;
+ }
+
+ private Object visitMarker(QueryPropertyMarker.Instance instance) {
+
+ if (instance == null || instance.getType() == null) {
+ return false;
+ }
+
+ // might need to handle double markers, such as delayed bounded ranges
+
+ switch (instance.getType()) {
+ case BOUNDED_RANGE:
+ case DELAYED:
+ case EVALUATION_ONLY:
+ case EXCEEDED_OR:
+ case EXCEEDED_TERM:
+ case EXCEEDED_VALUE:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
index 6df09d7646c..aaf34561c9f 100644
--- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
+++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
@@ -142,8 +142,10 @@
import datawave.query.jexl.visitors.RebuildingVisitor;
import datawave.query.jexl.visitors.RegexFunctionVisitor;
import datawave.query.jexl.visitors.RegexIndexExpansionVisitor;
+import datawave.query.jexl.visitors.RegexRewritePattern;
import datawave.query.jexl.visitors.RewriteNegationsVisitor;
import datawave.query.jexl.visitors.RewriteNullFunctionsVisitor;
+import datawave.query.jexl.visitors.RewriteRegexVisitor;
import datawave.query.jexl.visitors.SetMembershipVisitor;
import datawave.query.jexl.visitors.SortedUIDsRequiredVisitor;
import datawave.query.jexl.visitors.TermCountingVisitor;
@@ -304,6 +306,14 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable {
*/
protected boolean showReducedQueryPrune = true;
+ /**
+ * Controls optimistic rewriting of regex terms as filter functions, preserving overall query executability
+ */
+ protected boolean rewriteRegexTerms = false;
+ protected Set regexIncludeFields;
+ protected Set regexExcludeFields;
+ protected Set regexRewritePatterns;
+
// handles boilerplate operations that surround a visitor's execution (e.g., timers, logging, validating)
private TimedVisitorManager visitorManager = new TimedVisitorManager();
@@ -778,7 +788,10 @@ protected ASTJexlScript updateQueryTree(ScannerFactory scannerFactory, MetadataH
// | Post Query Model Expansion Clean Up |
// +-------------------------------------+
- Set indexOnlyFields = loadIndexedFields(config);
+ Set indexOnlyFields = loadIndexOnlyFields(config);
+
+ Set indexedFields = loadIndexedFields(config);
+ config.setIndexedFields(indexedFields);
if (!indexOnlyFields.isEmpty()) {
// filter:includeRegex and filter:excludeRegex functions cannot be run against index-only fields, clean that up
@@ -809,6 +822,11 @@ protected ASTJexlScript updateQueryTree(ScannerFactory scannerFactory, MetadataH
config.setQueryTree(timedEnforceUniqueDisjunctionsWithinExpressions(timers, config.getQueryTree()));
}
+ // rewrite regex nodes, optimistically
+ if (rewriteRegexTerms) {
+ RewriteRegexVisitor.rewrite(config.getQueryTree(), indexedFields, indexOnlyFields, regexIncludeFields, regexExcludeFields, regexRewritePatterns);
+ }
+
if (disableBoundedLookup) {
// protection mechanism. If we disable bounded ranges and have a
// LT,GT or ER node, we should expand it
@@ -1219,7 +1237,7 @@ protected QueryModel loadQueryModel(ShardQueryConfiguration config) {
*/
- protected Set loadIndexedFields(ShardQueryConfiguration config) {
+ protected Set loadIndexOnlyFields(ShardQueryConfiguration config) {
try {
return metadataHelper.getIndexOnlyFields(config.getDatatypeFilter());
} catch (TableNotFoundException e) {
@@ -1228,6 +1246,15 @@ protected Set loadIndexedFields(ShardQueryConfiguration config) {
}
}
+ protected Set loadIndexedFields(ShardQueryConfiguration config) {
+ try {
+ return metadataHelper.getIndexedFields(config.getDatatypeFilter());
+ } catch (TableNotFoundException e) {
+ QueryException qe = new QueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_RETRIEVAL_ERROR, e);
+ throw new DatawaveFatalQueryException(qe);
+ }
+ }
+
/**
* Loads expansion fields filtered by datatype. If an error occurs that error is rethrown as a {@link DatawaveFatalQueryException}
*
@@ -3215,6 +3242,38 @@ public static void setMaxTermsToPrint(int maxTermsToPrint) {
DefaultQueryPlanner.maxTermsToPrint = maxTermsToPrint;
}
+ public boolean isRewriteRegexTerms() {
+ return rewriteRegexTerms;
+ }
+
+ public void setRewriteRegexTerms(boolean rewriteRegexTerms) {
+ this.rewriteRegexTerms = rewriteRegexTerms;
+ }
+
+ public Set getRegexIncludeFields() {
+ return regexIncludeFields;
+ }
+
+ public void setRegexIncludeFields(Set regexIncludeFields) {
+ this.regexIncludeFields = regexIncludeFields;
+ }
+
+ public Set getRegexExcludeFields() {
+ return regexExcludeFields;
+ }
+
+ public void setRegexExcludeFields(Set regexExcludeFields) {
+ this.regexExcludeFields = regexExcludeFields;
+ }
+
+ public Set getRegexRewritePatterns() {
+ return regexRewritePatterns;
+ }
+
+ public void setRegexRewritePatterns(Set regexRewritePatterns) {
+ this.regexRewritePatterns = regexRewritePatterns;
+ }
+
/**
* Given a date, truncate it to year, month, date and increment the day by one to determine the following day.
*
diff --git a/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java b/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java
index 8149117800d..c51fe8154a6 100644
--- a/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java
+++ b/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java
@@ -888,4 +888,27 @@ public void testSortQueryBeforeGlobalIndex() throws Exception {
}
}
+ @Test
+ public void testRewriteRegexFromIncludes() throws Exception {
+ withQuery("ONLY_HEX == 'hexa' && TYPE =~ 'reg.*'");
+ withExpected(Sets.newHashSet(ShapesIngest.hexagonUid));
+ planAndExecuteQuery();
+ assertPlannedQuery("ONLY_HEX == 'hexa' && filter:includeRegex(TYPE, 'reg.*')");
+ }
+
+ @Test
+ public void testDoNotRewriteRegexWithExcludedField() throws Exception {
+ withQuery("ONLY_HEX == 'hexa' && SHAPE =~ 'hex.*'");
+ withExpected(Sets.newHashSet(ShapesIngest.hexagonUid));
+ planAndExecuteQuery();
+ assertPlannedQuery("ONLY_HEX == 'hexa' && ((_Delayed_ = true) && (SHAPE =~ 'hex.*'))");
+ }
+
+ @Test
+ public void testRewriteRegexWithExcludedFieldBecauseOfPatternMatch() throws Exception {
+ withQuery("ONLY_HEX == 'hexa' && SHAPE =~ 'hexag.*'");
+ withExpected(Sets.newHashSet(ShapesIngest.hexagonUid));
+ planAndExecuteQuery();
+ assertPlannedQuery("ONLY_HEX == 'hexa' && filter:includeRegex(SHAPE, 'hexag.*')");
+ }
}
diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java
new file mode 100644
index 00000000000..fd6add4b908
--- /dev/null
+++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java
@@ -0,0 +1,442 @@
+package datawave.query.jexl.visitors;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.commons.jexl3.parser.ASTJexlScript;
+import org.apache.commons.jexl3.parser.ParseException;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import datawave.query.jexl.JexlASTHelper;
+
+public class RewriteRegexVisitorTest {
+
+ private final Set indexedFields = Set.of("F", "F2", "IO", "IO2");
+ private final Set indexOnlyFields = Set.of("IO", "IO2");
+
+ private final Set includeFields = new HashSet<>();
+ private final Set excludeFields = new HashSet<>();
+
+ private final Set patterns = new HashSet<>();
+
+ @BeforeEach
+ public void beforeEach() {
+ includeFields.clear();
+ excludeFields.clear();
+ patterns.clear();
+ }
+
+ // A and regex
+ @Test
+ public void testSingleTermAndRegex() {
+ // term and indexed regex
+ test("F == 'a' && F =~ 'ba.*'", "F == 'a' && filter:includeRegex(F, 'ba.*')");
+ test("IO == 'a' && F =~ 'ba.*'", "IO == 'a' && filter:includeRegex(F, 'ba.*')");
+ test("NA == 'a' && F =~ 'ba.*'");
+
+ // term and index only regex is never rewritten
+ test("F == 'a' && IO =~ 'ba.*'");
+ test("IO == 'a' && IO =~ 'ba.*'");
+ test("NA == 'a' && IO =~ 'ba.*'");
+
+ // term and non-indexed regex is always rewritten
+ test("F == 'a' && NA =~ 'ba.*'", "F == 'a' && filter:includeRegex(NA, 'ba.*')");
+ test("IO == 'a' && NA =~ 'ba.*'", "IO == 'a' && filter:includeRegex(NA, 'ba.*')");
+ test("NA == 'a' && NA =~ 'ba.*'", "NA == 'a' && filter:includeRegex(NA, 'ba.*')");
+ }
+
+ // A or regex
+ @Test
+ public void testSingleTermOrRegex() {
+ // term or indexed regex is never rewritten
+ test("F == 'a' || F =~ 'ba.*'");
+ test("IO == 'a' || F =~ 'ba.*'");
+ test("NA == 'a' || F =~ 'ba.*'");
+
+ // term or index only regex is never rewritten
+ test("F == 'a' || IO =~ 'ba.*'");
+ test("IO == 'a' || IO =~ 'ba.*'");
+ test("NA == 'a' || IO =~ 'ba.*'");
+
+ // top level union with non-indexed regex is a full table scan, do not rewrite
+ test("F == 'a' || NA =~ 'ba.*'", "F == 'a' || filter:includeRegex(NA, 'ba.*')");
+ test("IO == 'a' || NA =~ 'ba.*'", "IO == 'a' || filter:includeRegex(NA, 'ba.*')");
+ test("NA == 'a' || NA =~ 'ba.*'", "NA == 'a' || filter:includeRegex(NA, 'ba.*')");
+ }
+
+ // (A and B) or regex
+ @Test
+ public void testNestedIntersectionOrRegex() {
+ // all combinations of nested intersection and indexed regex
+ test("(F == 'a' && F == 'b') || F =~ 'ba.*'");
+ test("(F == 'a' && IO == 'b') || F =~ 'ba.*'");
+ test("(F == 'a' && NA == 'b') || F =~ 'ba.*'");
+ test("(IO == 'a' && IO == 'b') || F =~ 'ba.*'");
+ test("(IO == 'a' && NA == 'b') || F =~ 'ba.*'");
+ test("(NA == 'a' && NA == 'b') || F =~ 'ba.*'");
+
+ // all combinations of nested intersection and index only regex
+ test("(F == 'a' && F == 'b') || IO =~ 'ba.*'");
+ test("(F == 'a' && IO == 'b') || IO =~ 'ba.*'");
+ test("(F == 'a' && NA == 'b') || IO =~ 'ba.*'");
+ test("(IO == 'a' && IO == 'b') || IO =~ 'ba.*'");
+ test("(IO == 'a' && NA == 'b') || IO =~ 'ba.*'");
+ test("(NA == 'a' && NA == 'b') || IO =~ 'ba.*'");
+
+ // the input queries are non-executable, non-indexed field still gets rewritten
+ // all combinations of nested intersection and non-indexed regex
+ test("(F == 'a' && F == 'b') || NA =~ 'ba.*'", "(F == 'a' && F == 'b') || filter:includeRegex(NA, 'ba.*')");
+ test("(F == 'a' && IO == 'b') || NA =~ 'ba.*'", "(F == 'a' && IO == 'b') || filter:includeRegex(NA, 'ba.*')");
+ test("(F == 'a' && NA == 'b') || NA =~ 'ba.*'", "(F == 'a' && NA == 'b') || filter:includeRegex(NA, 'ba.*')");
+ test("(IO == 'a' && IO == 'b') || NA =~ 'ba.*'", "(IO == 'a' && IO == 'b') || filter:includeRegex(NA, 'ba.*')");
+ test("(IO == 'a' && NA == 'b') || NA =~ 'ba.*'", "(IO == 'a' && NA == 'b') || filter:includeRegex(NA, 'ba.*')");
+ test("(NA == 'a' && NA == 'b') || Na =~ 'ba.*'", "(NA == 'a' && NA == 'b') || filter:includeRegex(Na, 'ba.*')");
+ }
+
+ // (A or B) and regex
+ @Test
+ public void testNestedUnionAndRegex() {
+ // all combinations of nested intersection and indexed regex
+ test("(F == 'a' || F == 'b') && F =~ 'ba.*'", "(F == 'a' || F == 'b') && filter:includeRegex(F, 'ba.*')");
+ test("(F == 'a' || IO == 'b') && F =~ 'ba.*'", "(F == 'a' || IO == 'b') && filter:includeRegex(F, 'ba.*')");
+ test("(F == 'a' || NA == 'b') && F =~ 'ba.*'");
+ test("(IO == 'a' || IO == 'b') && F =~ 'ba.*'", "(IO == 'a' || IO == 'b') && filter:includeRegex(F, 'ba.*')");
+ test("(IO == 'a' || NA == 'b') && F =~ 'ba.*'");
+ test("(NA == 'a' || NA == 'b') && F =~ 'ba.*'");
+
+ // all combinations of nested intersection and index only regex
+ test("(F == 'a' || F == 'b') && IO =~ 'ba.*'");
+ test("(F == 'a' || IO == 'b') && IO =~ 'ba.*'");
+ test("(F == 'a' || NA == 'b') && IO =~ 'ba.*'");
+ test("(IO == 'a' || IO == 'b') && IO =~ 'ba.*'");
+ test("(IO == 'a' || NA == 'b') && IO =~ 'ba.*'");
+ test("(NA == 'a' || NA == 'b') && IO =~ 'ba.*'");
+
+ // all combinations of nested intersection and non-indexed regex
+ test("(F == 'a' || F == 'b') && NA =~ 'ba.*'", "(F == 'a' || F == 'b') && filter:includeRegex(NA, 'ba.*')");
+ test("(F == 'a' || IO == 'b') && NA =~ 'ba.*'", "(F == 'a' || IO == 'b') && filter:includeRegex(NA, 'ba.*')");
+ test("(F == 'a' || NA == 'b') && NA =~ 'ba.*'", "(F == 'a' || NA == 'b') && filter:includeRegex(NA, 'ba.*')");
+ test("(IO == 'a' || IO == 'b') && NA =~ 'ba.*'", "(IO == 'a' || IO == 'b') && filter:includeRegex(NA, 'ba.*')");
+ test("(IO == 'a' || NA == 'b') && NA =~ 'ba.*'", "(IO == 'a' || NA == 'b') && filter:includeRegex(NA, 'ba.*')");
+ test("(NA == 'a' || NA == 'b') && Na =~ 'ba.*'", "(NA == 'a' || NA == 'b') && filter:includeRegex(Na, 'ba.*')");
+ }
+
+ // A and (B or regex)
+ @Test
+ public void testIntersectionWithNestedUnionWithSingleRegex() {
+ // top level indexed term, variable indexed state for nested term, indexed regex
+ test("F == 'a' && (F == 'b' || F =~ 'ba.*')", "F == 'a' && (F == 'b' || filter:includeRegex(F, 'ba.*'))");
+ test("F == 'a' && (IO == 'b' || F =~ 'ba.*')", "F == 'a' && (IO == 'b' || filter:includeRegex(F, 'ba.*'))");
+ test("F == 'a' && (NA == 'b' || F =~ 'ba.*')", "F == 'a' && (NA == 'b' || filter:includeRegex(F, 'ba.*'))");
+
+ // top level indexed term, variable indexed state for nested term, index only regex
+ test("F == 'a' && (F == 'b' || IO =~ 'ba.*')");
+ test("F == 'a' && (IO == 'b' || IO =~ 'ba.*')");
+ test("F == 'a' && (NA == 'b' || IO =~ 'ba.*')");
+
+ // top level indexed term, variable indexed state for nested term, non-indexed regex
+ test("F == 'a' && (F == 'b' || NA =~ 'ba.*')", "F == 'a' && (F == 'b' || filter:includeRegex(NA, 'ba.*'))");
+ test("F == 'a' && (IO == 'b' || NA =~ 'ba.*')", "F == 'a' && (IO == 'b' || filter:includeRegex(NA, 'ba.*'))");
+ test("F == 'a' && (NA == 'b' || NA =~ 'ba.*')", "F == 'a' && (NA == 'b' || filter:includeRegex(NA, 'ba.*'))");
+
+ // top level index only term, variable indexed state for nested term, indexed regex
+ test("IO == 'a' && (F == 'b' || F =~ 'ba.*')", "IO == 'a' && (F == 'b' || filter:includeRegex(F, 'ba.*'))");
+ test("IO == 'a' && (IO == 'b' || F =~ 'ba.*')", "IO == 'a' && (IO == 'b' || filter:includeRegex(F, 'ba.*'))");
+ test("IO == 'a' && (NA == 'b' || F =~ 'ba.*')", "IO == 'a' && (NA == 'b' || filter:includeRegex(F, 'ba.*'))");
+
+ // top level index only term, variable indexed state for nested term, index only regex
+ test("IO == 'a' && (F == 'b' || IO =~ 'ba.*')");
+ test("IO == 'a' && (IO == 'b' || IO =~ 'ba.*')");
+ test("IO == 'a' && (NA == 'b' || IO =~ 'ba.*')");
+
+ // top level index only term, variable indexed state for nested term, non-indexed regex
+ test("IO == 'a' && (F == 'b' || NA =~ 'ba.*')", "IO == 'a' && (F == 'b' || filter:includeRegex(NA, 'ba.*'))");
+ test("IO == 'a' && (IO == 'b' || NA =~ 'ba.*')", "IO == 'a' && (IO == 'b' || filter:includeRegex(NA, 'ba.*'))");
+ test("IO == 'a' && (NA == 'b' || NA =~ 'ba.*')", "IO == 'a' && (NA == 'b' || filter:includeRegex(NA, 'ba.*'))");
+
+ // top level non-indexed term, variable indexed state for nested term, indexed regex
+ test("NA == 'a' && (F == 'b' || F =~ 'ba.*')");
+ test("NA == 'a' && (IO == 'b' || F =~ 'ba.*')");
+ test("NA == 'a' && (NA == 'b' || F =~ 'ba.*')");
+
+ // top level non-indexed term, variable indexed state for nested term, index only regex
+ test("NA == 'a' && (F == 'b' || IO =~ 'ba.*')");
+ test("NA == 'a' && (IO == 'b' || IO =~ 'ba.*')");
+ test("NA == 'a' && (NA == 'b' || IO =~ 'ba.*')");
+
+ // top level non-indexed term, variable indexed state for nested term, non-indexed regex
+ test("NA == 'a' && (F == 'b' || NA =~ 'ba.*')", "NA == 'a' && (F == 'b' || filter:includeRegex(NA, 'ba.*'))");
+ test("NA == 'a' && (IO == 'b' || NA =~ 'ba.*')", "NA == 'a' && (IO == 'b' || filter:includeRegex(NA, 'ba.*'))");
+ test("NA == 'a' && (NA == 'b' || NA =~ 'ba.*')", "NA == 'a' && (NA == 'b' || filter:includeRegex(NA, 'ba.*'))");
+ }
+
+ // A or (B and regex)
+ @Test
+ public void testUnionWithNestedIntersectionWithSingleRegex() {
+ // top level indexed, variable index state of nested term, indexed regex
+ test("F == 'a' || (F == 'b' && F == 'ab.*')");
+ test("F == 'a' || (IO == 'b' && F == 'ab.*')");
+ test("F == 'a' || (NA == 'b' && F == 'ab.*')");
+
+ // top level indexed, variable index state of nested term, index only regex
+ test("F == 'a' || (F == 'b' && IO == 'ab.*')");
+ test("F == 'a' || (IO == 'b' && IO == 'ab.*')");
+ test("F == 'a' || (NA == 'b' && IO == 'ab.*')");
+
+ // top level indexed, variable index state of nested term, non-indexed regex
+ test("F == 'a' || (F == 'b' && NA == 'ab.*')");
+ test("F == 'a' || (IO == 'b' && NA == 'ab.*')");
+ test("F == 'a' || (NA == 'b' && NA == 'ab.*')");
+
+ // top level index only, variable index state of nested term, indexed regex
+ test("IO == 'a' || (F == 'b' && F == 'ab.*')");
+ test("IO == 'a' || (IO == 'b' && F == 'ab.*')");
+ test("IO == 'a' || (NA == 'b' && F == 'ab.*')");
+
+ // top level index only, variable index state of nested term, index only regex
+ test("IO == 'a' || (F == 'b' && IO == 'ab.*')");
+ test("IO == 'a' || (IO == 'b' && IO == 'ab.*')");
+ test("IO == 'a' || (NA == 'b' && IO == 'ab.*')");
+
+ // top level index only, variable index state of nested term, non-indexed regex
+ test("IO == 'a' || (F == 'b' && NA == 'ab.*')");
+ test("IO == 'a' || (IO == 'b' && NA == 'ab.*')");
+ test("IO == 'a' || (NA == 'b' && NA == 'ab.*')");
+
+ // top level non-indexed, variable index state of nested term, indexed regex
+ test("NA == 'a' || (F == 'b' && F == 'ab.*')");
+ test("NA == 'a' || (IO == 'b' && F == 'ab.*')");
+ test("NA == 'a' || (NA == 'b' && F == 'ab.*')");
+
+ // top level non-indexed, variable index state of nested term, index only regex
+ test("NA == 'a' || (F == 'b' && IO == 'ab.*')");
+ test("NA == 'a' || (IO == 'b' && IO == 'ab.*')");
+ test("NA == 'a' || (NA == 'b' && IO == 'ab.*')");
+
+ // top level non-indexed, variable index state of nested term, non-indexed regex
+ test("NA == 'a' || (F == 'b' && NA == 'ab.*')");
+ test("NA == 'a' || (IO == 'b' && NA == 'ab.*')");
+ test("NA == 'a' || (NA == 'b' && NA == 'ab.*')");
+ }
+
+ // A and (regex or regex)
+ @Test
+ public void testIntersectionWithNestedUnionOfRegexes() {
+ // indexed term and union of regexes with all possible index states
+ test("F == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "F == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(F, 'ac.*'))");
+ test("F == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "F == 'a' && (filter:includeRegex(F, 'ab.*') || IO =~ 'ac.*')");
+ test("F == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(NA, 'ac.*'))");
+ test("F == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')");
+ test("F == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (IO =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))");
+ test("F == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (filter:includeRegex(NA, 'ab.*') || filter:includeRegex(NA, 'ac.*'))");
+
+ // index only term and union of regexes with all possible index states
+ test("IO == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(F, 'ac.*'))");
+ test("IO == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(F, 'ab.*') || IO =~ 'ac.*')");
+ test("IO == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(NA, 'ac.*'))");
+ test("IO == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')");
+ test("IO == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (IO =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))");
+ test("IO == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(NA, 'ab.*') || filter:includeRegex(NA, 'ac.*'))");
+
+ // non-indexed tem and union of regexes with all possible index states
+ test("NA == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')");
+ test("NA == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')");
+ test("NA == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (F =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))");
+ test("NA == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')");
+ test("NA == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (IO =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))");
+ test("NA == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (filter:includeRegex(NA, 'ab.*') || filter:includeRegex(NA, 'ac.*'))");
+ }
+
+ // A or (regex and regex)
+ @Test
+ public void testUnionWithNestedIntersectionOfRegexes() {
+ // indexed term or intersection of regexes with all possible index states
+ test("F == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "F == 'a' || (filter:includeRegex(F, 'ab.*') && F =~ 'ac.*')");
+ test("F == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "F == 'a' || (filter:includeRegex(F, 'ab.*') && IO =~ 'ac.*')");
+ test("F == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (F =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
+ test("F == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')");
+ test("F == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (IO =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
+ test("F == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (filter:includeRegex(NA, 'ab.*') && filter:includeRegex(NA, 'ac.*'))");
+
+ // index only term or intersection of regexes with all possible index states
+ test("IO == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "IO == 'a' || (filter:includeRegex(F, 'ab.*') && F =~ 'ac.*')");
+ test("IO == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "IO == 'a' || (filter:includeRegex(F, 'ab.*') && IO =~ 'ac.*')");
+ test("IO == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (F =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
+ test("IO == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')");
+ test("IO == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (IO =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
+ test("IO == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (filter:includeRegex(NA, 'ab.*') && filter:includeRegex(NA, 'ac.*'))");
+
+ // non-indexed tem or intersection of regexes with all possible index states
+ test("NA == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "NA == 'a' || (filter:includeRegex(F, 'ab.*') && F =~ 'ac.*')");
+ test("NA == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "NA == 'a' || (filter:includeRegex(F, 'ab.*') && IO =~ 'ac.*')");
+ test("NA == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (F =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
+ test("NA == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')");
+ test("NA == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (IO =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
+ test("NA == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (filter:includeRegex(NA, 'ab.*') && filter:includeRegex(NA, 'ac.*'))");
+ }
+
+ // (A or regex) and (B or regex)
+ @Test
+ public void testNestedUnionsWithDistributedRegexes() {
+ String query = "(F == 'a' || F =~ 'ab.*') && (F == 'b' || F =~ 'ac.*')";
+ String expected = "(F == 'a' || filter:includeRegex(F, 'ab.*')) && (F == 'b' || F =~ 'ac.*')";
+ test(query, expected);
+
+ query = "(F == 'a' || NA =~ 'ab.*') && (F == 'b' || F =~ 'ac.*')";
+ expected = "(F == 'a' || filter:includeRegex(NA, 'ab.*')) && (F == 'b' || F =~ 'ac.*')";
+ test(query, expected);
+ }
+
+ // (A and regex) or (B and regex)
+ @Test
+ public void testNestedIntersectionsWithDistributedRegexes() {
+ String query = "(F == 'a' && F =~ 'ab.*') || (F == 'b' && F =~ 'ac.*')";
+ String expected = "(F == 'a' && filter:includeRegex(F, 'ab.*')) || (F == 'b' && filter:includeRegex(F, 'ac.*'))";
+ test(query, expected);
+ }
+
+ // (A or B) and (regex or regex)
+ @Test
+ public void testPartialAnchorAndNestedUnionRegex() {
+ String query = "(F == 'a' || F == 'b') && (F =~ 'ab.*' || F =~ 'ac.*')";
+ String expected = "(F == 'a' || F == 'b') && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(F, 'ac.*'))";
+ test(query, expected);
+ }
+
+ // A and (B or (C and regex)
+ @Test
+ public void testLeftAnchorAndDeeplyNestedRegex() {
+ String query = "F == 'a' && (F == 'b' || (F == 'c' && F =~ 'ab.*'))";
+ String expected = "F == 'a' && (F == 'b' || (F == 'c' && filter:includeRegex(F, 'ab.*')))";
+ test(query, expected);
+ }
+
+ // ((regex and C) or B) and A
+ @Test
+ public void testRightAnchorAndDeeplyNestedRegex() {
+ String query = "((F =~ 'ab.*' && F == 'c') || F == 'b') && F == 'a'";
+ String expected = "((filter:includeRegex(F, 'ab.*') && F == 'c') || F == 'b') && F == 'a'";
+ test(query, expected);
+ }
+
+ @Test
+ public void testUnionOfTwoLegalRewrites() {
+ String query = "(F == 'a' && F =~ 'ab.*') || (F == 'b' && F =~ 'ac.*')";
+ String expected = "(F == 'a' && filter:includeRegex(F, 'ab.*')) || (F == 'b' && filter:includeRegex(F, 'ac.*'))";
+ test(query, expected);
+ }
+
+ // (NA and regex) or (NA and regex)
+ @Test
+ public void testUnionOfTwoIllegalRewrites() {
+ String query = "(NA == 'a' && F =~ 'ab.*') || (NA == 'b' && F =~ 'ac.*')";
+ test(query);
+ }
+
+ @Test
+ public void testIncludeFieldsPreventNoRewrites() {
+ withIncludeFields(Set.of("F", "F2"));
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && filter:includeRegex(F, 'ab.*') && filter:includeRegex(F2, 'ac.*')");
+ }
+
+ @Test
+ public void testIncludeFieldsPreventSomeLegalRewrites() {
+ withIncludeFields(Set.of("F2"));
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && F =~ 'ab.*' && filter:includeRegex(F2, 'ac.*')");
+ }
+
+ @Test
+ public void testExcludeFieldsPreventAllLegalRewrites() {
+ withExcludeFields(Set.of("F", "F2"));
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'");
+ }
+
+ @Test
+ public void testExcludeFieldsPreventSomeLegalRewrites() {
+ withExcludeFields(Set.of("F2"));
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && filter:includeRegex(F, 'ab.*') && F2 =~ 'ac.*'");
+ }
+
+ @Test
+ public void testFullyInclusiveIncludeAndExcludeFields() {
+ withIncludeFields(Set.of("F"));
+ withExcludeFields(Set.of("F"));
+ // exclude fields beats include fields
+ test("IO == 'a' && F =~ 'ab.*'");
+ }
+
+ @Test
+ public void testPatternBeatsExcludeFields() {
+ withPattern("F", "zz.*");
+ withExcludeFields(Set.of("F"));
+ // pattern beats exclude fields
+ test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && filter:includeRegex(F, 'zz.*')");
+ }
+
+ @Test
+ public void testPatternBeatsIncludeFields() {
+ withPattern("F", "zz.*");
+ withIncludeFields(Set.of("F2"));
+ // pattern beats include fields
+ test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && filter:includeRegex(F, 'zz.*')");
+ }
+
+ @Test
+ public void testPatternBeatsIncludeAndExcludeFields() {
+ withPattern("F", "zz.*");
+ withIncludeFields(Set.of("F2"));
+ withExcludeFields(Set.of("F"));
+ // pattern beats include fields
+ test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && filter:includeRegex(F, 'zz.*')");
+ }
+
+ /**
+ * Assert that the provided query does not change
+ *
+ * @param query
+ * the query
+ */
+ private void test(String query) {
+ test(query, query);
+ }
+
+ /**
+ * Assert that the provided query matches the expected query after the {@link RewriteRegexVisitor} is applied
+ *
+ * @param query
+ * the query
+ * @param expected
+ * the expected result
+ */
+ private void test(String query, String expected) {
+ ASTJexlScript script = parse(query);
+ RewriteRegexVisitor.rewrite(script, indexedFields, indexOnlyFields, includeFields, excludeFields, patterns);
+ String result = JexlStringBuildingVisitor.buildQuery(script);
+ assertEquals(expected, result);
+ }
+
+ private ASTJexlScript parse(String query) {
+ try {
+ return JexlASTHelper.parseAndFlattenJexlQuery(query);
+ } catch (ParseException e) {
+ fail("Failed to parse query: " + query, e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void withIncludeFields(Set includeFields) {
+ this.includeFields.addAll(includeFields);
+ }
+
+ private void withExcludeFields(Set excludeFields) {
+ this.excludeFields.addAll(excludeFields);
+ }
+
+ private void withPattern(String field, String literal) {
+ patterns.add(new RegexRewritePattern(field, literal));
+ }
+}
diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java
new file mode 100644
index 00000000000..6b63061f7a3
--- /dev/null
+++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java
@@ -0,0 +1,274 @@
+package datawave.query.jexl.visitors.pushdown;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.commons.jexl3.parser.ASTJexlScript;
+import org.apache.commons.jexl3.parser.JexlNode;
+import org.junit.jupiter.api.Test;
+
+import datawave.query.jexl.JexlASTHelper;
+
+class AnchorDetectionVisitorTest {
+
+ private final Set indexOnlyFields = Collections.singleton("IO");
+ private final Set indexedFields = Collections.singleton("F");
+ private AnchorDetectionVisitor visitor;
+
+ @Test
+ void testIndexedLeaves() {
+ // @formatter:off
+ String[] queries = new String[]{
+ "F == '1'",
+ "F != '1'",
+ "F < '2'",
+ "F > '2'",
+ "F <= '2'",
+ "F >= '2'",
+ "F =~ 'ba.*'",
+ "F !~ 'ba.*'",
+ };
+ // @formatter:on
+
+ test(queries, true);
+ }
+
+ @Test
+ void testIndexOnlyLeaves() {
+ // @formatter:off
+ String[] queries = new String[]{
+ "IO == '1'",
+ "IO != '1'",
+ "IO < '2'",
+ "IO > '2'",
+ "IO <= '2'",
+ "IO >= '2'",
+ "IO =~ 'ba.*'",
+ "IO !~ 'ba.*'",
+ };
+ // @formatter:on
+
+ test(queries, true);
+ }
+
+ @Test
+ void testNonIndexedLeaves() {
+ // @formatter:off
+ String[] queries = new String[]{
+ "FIELD == '1'",
+ "FIELD != '1'",
+ "FIELD < '2'",
+ "FIELD > '2'",
+ "FIELD <= '2'",
+ "FIELD >= '2'",
+ "FIELD =~ 'ba.*'",
+ "FIELD !~ 'ba.*'",
+ };
+ // @formatter:on
+
+ test(queries, false);
+ }
+
+ @Test
+ void testNullLiterals() {
+ test("F == null", false);
+ test("F != null", false);
+ test("IO == null", false);
+ test("IO != null", false);
+ test("FIELD == null", false);
+ test("FIELD != null", false);
+ }
+
+ @Test
+ void testFilterFunctions() {
+ // @formatter:off
+ String[] queries = new String[]{
+ // index only include/exclude are rewritten to regex nodes
+ "filter:include(F, 'ba.*')",
+ "filter:exclude(F, 'ba.*')",
+ "filter:include(FIELD, 'ba.*')",
+ "filter:exclude(FIELD, 'ba.*')",
+ // isNull functions should be rewritten to 'F == null'
+ "filter:isNull(F)",
+ "filter:isNull(F)",
+ "filter:isNull(FIELD)",
+ "filter:isNull(FIELD)",
+ // isNotNull functions should be rewritten to !(F == null)
+ "filter:isNotNull(F)",
+ "filter:isNotNull(F)",
+ "filter:isNotNull(FIELD)",
+ "filter:isNotNull(FIELD)",
+ "filter:compare(F,'==','any',F)",
+ "filter:compare(IO,'==','any',IO)",
+ "filter:compare(FIELD,'==','any',FIELD)",
+ };
+ // @formatter:on
+
+ test(queries, false);
+ }
+
+ @Test
+ void testMarkers() {
+ // @formatter:off
+ String[] anchorMarkers = new String[] {
+ "((_Bounded_ = true) && (F > '2' && F < '5'))",
+ "((_Delayed_ = true) && (F == '1'))",
+ "((_Eval_ = true) && (F == '1'))",
+ "((_List_ = true) && ((id = 'id') && (field = 'F') && (params = '{\"ranges\":[[\"[r1\",\"r2]\"],[\"[r3\",\"f4]\"]]}')))",
+ "((_Value_ = true) && (F =~ 'ba.*'))",
+ "((_Term_ = true) && (_ANYFIELD_ =~ 'ba.*'))"
+ };
+ // @formatter:on
+
+ test(anchorMarkers, true);
+
+ // @formatter:off
+ String[] nonAnchorMarkers = new String[]{
+ "((_Hole_ = true) && (F == '1'))",
+ "((_Drop_ = true) && (F == '1'))",
+ "((_Lenient_ = true) && (F == '1'))",
+ "((_Strict_ = true) && (F == '1'))"
+ };
+ // @formatter:on
+
+ test(nonAnchorMarkers, false);
+ }
+
+ @Test
+ void testUnions() {
+ // @formatter:off
+ String[] anchorUnions = new String[] {
+ "F == '1' || F == '2'",
+ "F == '1' || IO == '1'",
+ "IO == '1' || IO == '2'"};
+ // @formatter:on
+
+ test(anchorUnions, true);
+
+ // @formatter:off
+ String[] nonAnchorUnions = new String[] {
+ "FIELD == '1' || F == '2'",
+ "F == '1' || IO == '1' || FIELD == '3'",
+ "FIELD == '1' || FIELD == '2'"};
+ // @formatter:onn
+
+ test(nonAnchorUnions, false);
+ }
+
+ @Test
+ void testIntersections() {
+ // @formatter:off
+ String[] anchorIntersections = new String[] {
+ "F == '1' && F == '2'",
+ "F == '1' && IO == '1'",
+ "IO == '1' && IO == '2'",
+ "F == '1' && IO == null",
+ "IO == '1' && IO == null",
+ // intersection needs just one anchor to be executable
+ "X == '1' && F == '2'", "X == '1' && IO == '2'"
+ };
+ // @formatter:on
+
+ test(anchorIntersections, true);
+
+ // @formatter:off
+ String[] nonAnchorQueries = new String[] {
+ "X == '1' && Y == '2' && Z == '3'",
+ "F == null && IO == null",
+ };
+ // @formatter:on
+
+ test(nonAnchorQueries, false);
+ }
+
+ @Test
+ void testNestedUnions() {
+ // @formatter:off
+ String[] anchorNestedUnions = new String[]{
+ "(F == '1' || F == '2') && (F == '3' || F == '4')",
+ "(F == '1' || F == '2') && (IO == '3' || IO == '4')",
+ "(IO == '1' || IO == '2') && (F == '3' || F == '4')",
+ "(F == '1' || IO == '2') && (F == '3' || IO == '4')",
+ "(IO == '1' || F == '2') && (IO == '3' || F == '4')",
+ };
+ // @formatter:on
+
+ test(anchorNestedUnions, true);
+ }
+
+ @Test
+ void testNestedIntersections() {
+ // @formatter:off
+ String[] anchorNestedIntersections = new String[]{
+ "(F == '1' && F == '2') || (F == '3' && F == '4')",
+ "(F == '1' && F == '2') || (IO == '3' && IO == '4')",
+ "(IO == '1' && IO == '2') || (F == '3' && F == '4')",
+ "(F == '1' && IO == '2') || (F == '3' && IO == '4')",
+ "(IO == '1' && F == '2') || (IO == '3' && F == '4')",
+ };
+ // @formatter:on
+
+ test(anchorNestedIntersections, true);
+ }
+
+ @Test
+ void testFullContentPhraseFunction() {
+ String query = "content:phrase(F, termOffsetMap, 'foo', 'bar') && F == 'foo' && F == 'bar'";
+ test(query, true);
+ }
+
+ @Test
+ void testArithmeticAndSizeMethods() {
+ // @formatter:off
+ String[] queries = new String[]{
+ // filter
+ "filter:getMinTime(F) == 1892160000000",
+ "filter:getMinTime(F) != 1892160000000",
+ "filter:getMinTime(F) > 1892160000000",
+ "filter:getMinTime(F) < 1892160000000",
+ "filter:getMinTime(F) >= 1892160000000",
+ "filter:getMinTime(F) <= 1892160000000",
+ // method
+ "F.size() == 1",
+ "F.size() != 1",
+ "F.size() > 1",
+ "F.size() < 1",
+ "F.size() >= 1",
+ "F.size() <= 1",
+ };
+ // @formatter:on
+
+ test(queries, false);
+ }
+
+ private void test(String[] queries, boolean expected) {
+ for (String query : queries) {
+ test(query, expected);
+ }
+ }
+
+ private void test(String query, boolean expected) {
+ JexlNode node = parseQuery(query);
+ assertEquals(expected, getVisitor().isAnchor(node));
+ }
+
+ private JexlNode parseQuery(String query) {
+ try {
+ ASTJexlScript script = JexlASTHelper.parseAndFlattenJexlQuery(query);
+ return script.jjtGetChild(0);
+ } catch (Exception e) {
+ fail("Could not parse query: " + query);
+ throw new IllegalStateException(e);
+ }
+ }
+
+ private AnchorDetectionVisitor getVisitor() {
+ if (visitor == null) {
+ visitor = new AnchorDetectionVisitor(indexedFields, indexOnlyFields);
+ }
+ return visitor;
+ }
+}
diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
index 5bc61292091..42a76e6f14a 100644
--- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
+++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
@@ -361,8 +361,27 @@
+
+
+
+
+
+ TYPE
+
+
+
+ SHAPE
+
+
+
+
+
+
+
+
+
From c5bb43a61d5eb72cfac3025269f3ca2ddd0346c2 Mon Sep 17 00:00:00 2001
From: Moriarty <22225248+apmoriarty@users.noreply.github.com>
Date: Thu, 17 Oct 2024 12:37:35 +0000
Subject: [PATCH 2/3] Wrap regex terms in eval only marker instead of rewriting
into filter function
---
.../jexl/visitors/RewriteRegexVisitor.java | 8 +-
.../pushdown/AnchorDetectionVisitor.java | 4 +-
.../test/java/datawave/query/ShapesTest.java | 4 +-
.../visitors/RewriteRegexVisitorTest.java | 158 +++++++++---------
.../pushdown/AnchorDetectionVisitorTest.java | 4 +-
5 files changed, 89 insertions(+), 89 deletions(-)
diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RewriteRegexVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RewriteRegexVisitor.java
index aaefde6d346..2a1195e71b8 100644
--- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RewriteRegexVisitor.java
+++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RewriteRegexVisitor.java
@@ -12,13 +12,13 @@
import datawave.query.Constants;
import datawave.query.jexl.JexlASTHelper;
-import datawave.query.jexl.JexlNodeFactory;
import datawave.query.jexl.NodeTypeCount;
import datawave.query.jexl.nodes.QueryPropertyMarker;
+import datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType;
import datawave.query.jexl.visitors.pushdown.AnchorDetectionVisitor;
/**
- * Rewrites regex terms as filter functions provided an anchor exists.
+ * Rewrites regex terms provided an anchor exists. Regex terms are wrapped in EvalOnly marker
*
* An anchor is an executable term or subtree.
*
@@ -175,8 +175,8 @@ public Object visit(ASTERNode node, Object data) {
String literal = (String) JexlASTHelper.getLiteralValue(node);
if (isNodeRewritableFromRules(field, literal)) {
- JexlNode rewrite = JexlNodeFactory.buildFunctionNode("filter", "includeRegex", field, literal);
- JexlNodes.replaceChild(node.jjtGetParent(), node, rewrite);
+ JexlNode marker = QueryPropertyMarker.create(node, MarkerType.EVALUATION_ONLY);
+ JexlNodes.replaceChild(node.jjtGetParent(), node, marker);
}
}
diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java
index 018658b1eb2..4740b298948 100644
--- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java
+++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java
@@ -194,12 +194,12 @@ private Object visitMarker(QueryPropertyMarker.Instance instance) {
switch (instance.getType()) {
case BOUNDED_RANGE:
- case DELAYED:
- case EVALUATION_ONLY:
case EXCEEDED_OR:
case EXCEEDED_TERM:
case EXCEEDED_VALUE:
return true;
+ case DELAYED:
+ case EVALUATION_ONLY:
default:
return false;
}
diff --git a/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java b/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java
index c51fe8154a6..d8661daa4c6 100644
--- a/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java
+++ b/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java
@@ -893,7 +893,7 @@ public void testRewriteRegexFromIncludes() throws Exception {
withQuery("ONLY_HEX == 'hexa' && TYPE =~ 'reg.*'");
withExpected(Sets.newHashSet(ShapesIngest.hexagonUid));
planAndExecuteQuery();
- assertPlannedQuery("ONLY_HEX == 'hexa' && filter:includeRegex(TYPE, 'reg.*')");
+ assertPlannedQuery("ONLY_HEX == 'hexa' && ((_Eval_ = true) && (TYPE =~ 'reg.*'))");
}
@Test
@@ -909,6 +909,6 @@ public void testRewriteRegexWithExcludedFieldBecauseOfPatternMatch() throws Exce
withQuery("ONLY_HEX == 'hexa' && SHAPE =~ 'hexag.*'");
withExpected(Sets.newHashSet(ShapesIngest.hexagonUid));
planAndExecuteQuery();
- assertPlannedQuery("ONLY_HEX == 'hexa' && filter:includeRegex(SHAPE, 'hexag.*')");
+ assertPlannedQuery("ONLY_HEX == 'hexa' && ((_Eval_ = true) && (SHAPE =~ 'hexag.*'))");
}
}
diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java
index fd6add4b908..e77c8d6fc53 100644
--- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java
+++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java
@@ -34,8 +34,8 @@ public void beforeEach() {
@Test
public void testSingleTermAndRegex() {
// term and indexed regex
- test("F == 'a' && F =~ 'ba.*'", "F == 'a' && filter:includeRegex(F, 'ba.*')");
- test("IO == 'a' && F =~ 'ba.*'", "IO == 'a' && filter:includeRegex(F, 'ba.*')");
+ test("F == 'a' && F =~ 'ba.*'", "F == 'a' && ((_Eval_ = true) && (F =~ 'ba.*'))");
+ test("IO == 'a' && F =~ 'ba.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'ba.*'))");
test("NA == 'a' && F =~ 'ba.*'");
// term and index only regex is never rewritten
@@ -44,9 +44,9 @@ public void testSingleTermAndRegex() {
test("NA == 'a' && IO =~ 'ba.*'");
// term and non-indexed regex is always rewritten
- test("F == 'a' && NA =~ 'ba.*'", "F == 'a' && filter:includeRegex(NA, 'ba.*')");
- test("IO == 'a' && NA =~ 'ba.*'", "IO == 'a' && filter:includeRegex(NA, 'ba.*')");
- test("NA == 'a' && NA =~ 'ba.*'", "NA == 'a' && filter:includeRegex(NA, 'ba.*')");
+ test("F == 'a' && NA =~ 'ba.*'", "F == 'a' && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("IO == 'a' && NA =~ 'ba.*'", "IO == 'a' && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("NA == 'a' && NA =~ 'ba.*'", "NA == 'a' && ((_Eval_ = true) && (NA =~ 'ba.*'))");
}
// A or regex
@@ -63,9 +63,9 @@ public void testSingleTermOrRegex() {
test("NA == 'a' || IO =~ 'ba.*'");
// top level union with non-indexed regex is a full table scan, do not rewrite
- test("F == 'a' || NA =~ 'ba.*'", "F == 'a' || filter:includeRegex(NA, 'ba.*')");
- test("IO == 'a' || NA =~ 'ba.*'", "IO == 'a' || filter:includeRegex(NA, 'ba.*')");
- test("NA == 'a' || NA =~ 'ba.*'", "NA == 'a' || filter:includeRegex(NA, 'ba.*')");
+ test("F == 'a' || NA =~ 'ba.*'", "F == 'a' || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("IO == 'a' || NA =~ 'ba.*'", "IO == 'a' || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("NA == 'a' || NA =~ 'ba.*'", "NA == 'a' || ((_Eval_ = true) && (NA =~ 'ba.*'))");
}
// (A and B) or regex
@@ -89,22 +89,22 @@ public void testNestedIntersectionOrRegex() {
// the input queries are non-executable, non-indexed field still gets rewritten
// all combinations of nested intersection and non-indexed regex
- test("(F == 'a' && F == 'b') || NA =~ 'ba.*'", "(F == 'a' && F == 'b') || filter:includeRegex(NA, 'ba.*')");
- test("(F == 'a' && IO == 'b') || NA =~ 'ba.*'", "(F == 'a' && IO == 'b') || filter:includeRegex(NA, 'ba.*')");
- test("(F == 'a' && NA == 'b') || NA =~ 'ba.*'", "(F == 'a' && NA == 'b') || filter:includeRegex(NA, 'ba.*')");
- test("(IO == 'a' && IO == 'b') || NA =~ 'ba.*'", "(IO == 'a' && IO == 'b') || filter:includeRegex(NA, 'ba.*')");
- test("(IO == 'a' && NA == 'b') || NA =~ 'ba.*'", "(IO == 'a' && NA == 'b') || filter:includeRegex(NA, 'ba.*')");
- test("(NA == 'a' && NA == 'b') || Na =~ 'ba.*'", "(NA == 'a' && NA == 'b') || filter:includeRegex(Na, 'ba.*')");
+ test("(F == 'a' && F == 'b') || NA =~ 'ba.*'", "(F == 'a' && F == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(F == 'a' && IO == 'b') || NA =~ 'ba.*'", "(F == 'a' && IO == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(F == 'a' && NA == 'b') || NA =~ 'ba.*'", "(F == 'a' && NA == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(IO == 'a' && IO == 'b') || NA =~ 'ba.*'", "(IO == 'a' && IO == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(IO == 'a' && NA == 'b') || NA =~ 'ba.*'", "(IO == 'a' && NA == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(NA == 'a' && NA == 'b') || NA =~ 'ba.*'", "(NA == 'a' && NA == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
}
// (A or B) and regex
@Test
public void testNestedUnionAndRegex() {
// all combinations of nested intersection and indexed regex
- test("(F == 'a' || F == 'b') && F =~ 'ba.*'", "(F == 'a' || F == 'b') && filter:includeRegex(F, 'ba.*')");
- test("(F == 'a' || IO == 'b') && F =~ 'ba.*'", "(F == 'a' || IO == 'b') && filter:includeRegex(F, 'ba.*')");
+ test("(F == 'a' || F == 'b') && F =~ 'ba.*'", "(F == 'a' || F == 'b') && ((_Eval_ = true) && (F =~ 'ba.*'))");
+ test("(F == 'a' || IO == 'b') && F =~ 'ba.*'", "(F == 'a' || IO == 'b') && ((_Eval_ = true) && (F =~ 'ba.*'))");
test("(F == 'a' || NA == 'b') && F =~ 'ba.*'");
- test("(IO == 'a' || IO == 'b') && F =~ 'ba.*'", "(IO == 'a' || IO == 'b') && filter:includeRegex(F, 'ba.*')");
+ test("(IO == 'a' || IO == 'b') && F =~ 'ba.*'", "(IO == 'a' || IO == 'b') && ((_Eval_ = true) && (F =~ 'ba.*'))");
test("(IO == 'a' || NA == 'b') && F =~ 'ba.*'");
test("(NA == 'a' || NA == 'b') && F =~ 'ba.*'");
@@ -117,21 +117,21 @@ public void testNestedUnionAndRegex() {
test("(NA == 'a' || NA == 'b') && IO =~ 'ba.*'");
// all combinations of nested intersection and non-indexed regex
- test("(F == 'a' || F == 'b') && NA =~ 'ba.*'", "(F == 'a' || F == 'b') && filter:includeRegex(NA, 'ba.*')");
- test("(F == 'a' || IO == 'b') && NA =~ 'ba.*'", "(F == 'a' || IO == 'b') && filter:includeRegex(NA, 'ba.*')");
- test("(F == 'a' || NA == 'b') && NA =~ 'ba.*'", "(F == 'a' || NA == 'b') && filter:includeRegex(NA, 'ba.*')");
- test("(IO == 'a' || IO == 'b') && NA =~ 'ba.*'", "(IO == 'a' || IO == 'b') && filter:includeRegex(NA, 'ba.*')");
- test("(IO == 'a' || NA == 'b') && NA =~ 'ba.*'", "(IO == 'a' || NA == 'b') && filter:includeRegex(NA, 'ba.*')");
- test("(NA == 'a' || NA == 'b') && Na =~ 'ba.*'", "(NA == 'a' || NA == 'b') && filter:includeRegex(Na, 'ba.*')");
+ test("(F == 'a' || F == 'b') && NA =~ 'ba.*'", "(F == 'a' || F == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(F == 'a' || IO == 'b') && NA =~ 'ba.*'", "(F == 'a' || IO == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(F == 'a' || NA == 'b') && NA =~ 'ba.*'", "(F == 'a' || NA == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(IO == 'a' || IO == 'b') && NA =~ 'ba.*'", "(IO == 'a' || IO == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(IO == 'a' || NA == 'b') && NA =~ 'ba.*'", "(IO == 'a' || NA == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(NA == 'a' || NA == 'b') && NA =~ 'ba.*'", "(NA == 'a' || NA == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
}
// A and (B or regex)
@Test
public void testIntersectionWithNestedUnionWithSingleRegex() {
// top level indexed term, variable indexed state for nested term, indexed regex
- test("F == 'a' && (F == 'b' || F =~ 'ba.*')", "F == 'a' && (F == 'b' || filter:includeRegex(F, 'ba.*'))");
- test("F == 'a' && (IO == 'b' || F =~ 'ba.*')", "F == 'a' && (IO == 'b' || filter:includeRegex(F, 'ba.*'))");
- test("F == 'a' && (NA == 'b' || F =~ 'ba.*')", "F == 'a' && (NA == 'b' || filter:includeRegex(F, 'ba.*'))");
+ test("F == 'a' && (F == 'b' || F =~ 'ba.*')", "F == 'a' && (F == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
+ test("F == 'a' && (IO == 'b' || F =~ 'ba.*')", "F == 'a' && (IO == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
+ test("F == 'a' && (NA == 'b' || F =~ 'ba.*')", "F == 'a' && (NA == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
// top level indexed term, variable indexed state for nested term, index only regex
test("F == 'a' && (F == 'b' || IO =~ 'ba.*')");
@@ -139,14 +139,14 @@ public void testIntersectionWithNestedUnionWithSingleRegex() {
test("F == 'a' && (NA == 'b' || IO =~ 'ba.*')");
// top level indexed term, variable indexed state for nested term, non-indexed regex
- test("F == 'a' && (F == 'b' || NA =~ 'ba.*')", "F == 'a' && (F == 'b' || filter:includeRegex(NA, 'ba.*'))");
- test("F == 'a' && (IO == 'b' || NA =~ 'ba.*')", "F == 'a' && (IO == 'b' || filter:includeRegex(NA, 'ba.*'))");
- test("F == 'a' && (NA == 'b' || NA =~ 'ba.*')", "F == 'a' && (NA == 'b' || filter:includeRegex(NA, 'ba.*'))");
+ test("F == 'a' && (F == 'b' || NA =~ 'ba.*')", "F == 'a' && (F == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("F == 'a' && (IO == 'b' || NA =~ 'ba.*')", "F == 'a' && (IO == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("F == 'a' && (NA == 'b' || NA =~ 'ba.*')", "F == 'a' && (NA == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
// top level index only term, variable indexed state for nested term, indexed regex
- test("IO == 'a' && (F == 'b' || F =~ 'ba.*')", "IO == 'a' && (F == 'b' || filter:includeRegex(F, 'ba.*'))");
- test("IO == 'a' && (IO == 'b' || F =~ 'ba.*')", "IO == 'a' && (IO == 'b' || filter:includeRegex(F, 'ba.*'))");
- test("IO == 'a' && (NA == 'b' || F =~ 'ba.*')", "IO == 'a' && (NA == 'b' || filter:includeRegex(F, 'ba.*'))");
+ test("IO == 'a' && (F == 'b' || F =~ 'ba.*')", "IO == 'a' && (F == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
+ test("IO == 'a' && (IO == 'b' || F =~ 'ba.*')", "IO == 'a' && (IO == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
+ test("IO == 'a' && (NA == 'b' || F =~ 'ba.*')", "IO == 'a' && (NA == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
// top level index only term, variable indexed state for nested term, index only regex
test("IO == 'a' && (F == 'b' || IO =~ 'ba.*')");
@@ -154,9 +154,9 @@ public void testIntersectionWithNestedUnionWithSingleRegex() {
test("IO == 'a' && (NA == 'b' || IO =~ 'ba.*')");
// top level index only term, variable indexed state for nested term, non-indexed regex
- test("IO == 'a' && (F == 'b' || NA =~ 'ba.*')", "IO == 'a' && (F == 'b' || filter:includeRegex(NA, 'ba.*'))");
- test("IO == 'a' && (IO == 'b' || NA =~ 'ba.*')", "IO == 'a' && (IO == 'b' || filter:includeRegex(NA, 'ba.*'))");
- test("IO == 'a' && (NA == 'b' || NA =~ 'ba.*')", "IO == 'a' && (NA == 'b' || filter:includeRegex(NA, 'ba.*'))");
+ test("IO == 'a' && (F == 'b' || NA =~ 'ba.*')", "IO == 'a' && (F == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("IO == 'a' && (IO == 'b' || NA =~ 'ba.*')", "IO == 'a' && (IO == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("IO == 'a' && (NA == 'b' || NA =~ 'ba.*')", "IO == 'a' && (NA == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
// top level non-indexed term, variable indexed state for nested term, indexed regex
test("NA == 'a' && (F == 'b' || F =~ 'ba.*')");
@@ -169,9 +169,9 @@ public void testIntersectionWithNestedUnionWithSingleRegex() {
test("NA == 'a' && (NA == 'b' || IO =~ 'ba.*')");
// top level non-indexed term, variable indexed state for nested term, non-indexed regex
- test("NA == 'a' && (F == 'b' || NA =~ 'ba.*')", "NA == 'a' && (F == 'b' || filter:includeRegex(NA, 'ba.*'))");
- test("NA == 'a' && (IO == 'b' || NA =~ 'ba.*')", "NA == 'a' && (IO == 'b' || filter:includeRegex(NA, 'ba.*'))");
- test("NA == 'a' && (NA == 'b' || NA =~ 'ba.*')", "NA == 'a' && (NA == 'b' || filter:includeRegex(NA, 'ba.*'))");
+ test("NA == 'a' && (F == 'b' || NA =~ 'ba.*')", "NA == 'a' && (F == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("NA == 'a' && (IO == 'b' || NA =~ 'ba.*')", "NA == 'a' && (IO == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("NA == 'a' && (NA == 'b' || NA =~ 'ba.*')", "NA == 'a' && (NA == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
}
// A or (B and regex)
@@ -227,67 +227,67 @@ public void testUnionWithNestedIntersectionWithSingleRegex() {
@Test
public void testIntersectionWithNestedUnionOfRegexes() {
// indexed term and union of regexes with all possible index states
- test("F == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "F == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(F, 'ac.*'))");
- test("F == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "F == 'a' && (filter:includeRegex(F, 'ab.*') || IO =~ 'ac.*')");
- test("F == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(NA, 'ac.*'))");
+ test("F == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (F =~ 'ac.*')))");
+ test("F == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || IO =~ 'ac.*')");
+ test("F == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))");
test("F == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')");
- test("F == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (IO =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))");
- test("F == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (filter:includeRegex(NA, 'ab.*') || filter:includeRegex(NA, 'ac.*'))");
+ test("F == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (IO =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("F == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (NA =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))");
// index only term and union of regexes with all possible index states
- test("IO == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(F, 'ac.*'))");
- test("IO == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(F, 'ab.*') || IO =~ 'ac.*')");
- test("IO == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(NA, 'ac.*'))");
+ test("IO == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (F =~ 'ac.*')))");
+ test("IO == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || IO =~ 'ac.*')");
+ test("IO == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))");
test("IO == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')");
- test("IO == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (IO =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))");
- test("IO == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(NA, 'ab.*') || filter:includeRegex(NA, 'ac.*'))");
+ test("IO == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (IO =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("IO == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (NA =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))");
// non-indexed tem and union of regexes with all possible index states
test("NA == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')");
test("NA == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')");
- test("NA == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (F =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))");
+ test("NA == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (F =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))");
test("NA == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')");
- test("NA == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (IO =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))");
- test("NA == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (filter:includeRegex(NA, 'ab.*') || filter:includeRegex(NA, 'ac.*'))");
+ test("NA == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (IO =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("NA == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (((_Eval_ = true) && (NA =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))");
}
// A or (regex and regex)
@Test
public void testUnionWithNestedIntersectionOfRegexes() {
// indexed term or intersection of regexes with all possible index states
- test("F == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "F == 'a' || (filter:includeRegex(F, 'ab.*') && F =~ 'ac.*')");
- test("F == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "F == 'a' || (filter:includeRegex(F, 'ab.*') && IO =~ 'ac.*')");
- test("F == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (F =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
+ test("F == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "F == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && F =~ 'ac.*')");
+ test("F == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "F == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && IO =~ 'ac.*')");
+ test("F == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (F =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
test("F == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')");
- test("F == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (IO =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
- test("F == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (filter:includeRegex(NA, 'ab.*') && filter:includeRegex(NA, 'ac.*'))");
+ test("F == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (IO =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("F == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (((_Eval_ = true) && (NA =~ 'ab.*')) && ((_Eval_ = true) && (NA =~ 'ac.*')))");
// index only term or intersection of regexes with all possible index states
- test("IO == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "IO == 'a' || (filter:includeRegex(F, 'ab.*') && F =~ 'ac.*')");
- test("IO == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "IO == 'a' || (filter:includeRegex(F, 'ab.*') && IO =~ 'ac.*')");
- test("IO == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (F =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
+ test("IO == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "IO == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && F =~ 'ac.*')");
+ test("IO == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "IO == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && IO =~ 'ac.*')");
+ test("IO == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (F =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
test("IO == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')");
- test("IO == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (IO =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
- test("IO == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (filter:includeRegex(NA, 'ab.*') && filter:includeRegex(NA, 'ac.*'))");
+ test("IO == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (IO =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("IO == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (((_Eval_ = true) && (NA =~ 'ab.*')) && ((_Eval_ = true) && (NA =~ 'ac.*')))");
// non-indexed tem or intersection of regexes with all possible index states
- test("NA == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "NA == 'a' || (filter:includeRegex(F, 'ab.*') && F =~ 'ac.*')");
- test("NA == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "NA == 'a' || (filter:includeRegex(F, 'ab.*') && IO =~ 'ac.*')");
- test("NA == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (F =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
+ test("NA == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "NA == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && F =~ 'ac.*')");
+ test("NA == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "NA == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && IO =~ 'ac.*')");
+ test("NA == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (F =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
test("NA == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')");
- test("NA == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (IO =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))");
- test("NA == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (filter:includeRegex(NA, 'ab.*') && filter:includeRegex(NA, 'ac.*'))");
+ test("NA == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (IO =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("NA == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (((_Eval_ = true) && (NA =~ 'ab.*')) && ((_Eval_ = true) && (NA =~ 'ac.*')))");
}
// (A or regex) and (B or regex)
@Test
public void testNestedUnionsWithDistributedRegexes() {
String query = "(F == 'a' || F =~ 'ab.*') && (F == 'b' || F =~ 'ac.*')";
- String expected = "(F == 'a' || filter:includeRegex(F, 'ab.*')) && (F == 'b' || F =~ 'ac.*')";
+ String expected = "(F == 'a' || ((_Eval_ = true) && (F =~ 'ab.*'))) && (F == 'b' || F =~ 'ac.*')";
test(query, expected);
query = "(F == 'a' || NA =~ 'ab.*') && (F == 'b' || F =~ 'ac.*')";
- expected = "(F == 'a' || filter:includeRegex(NA, 'ab.*')) && (F == 'b' || F =~ 'ac.*')";
+ expected = "(F == 'a' || ((_Eval_ = true) && (NA =~ 'ab.*'))) && (F == 'b' || F =~ 'ac.*')";
test(query, expected);
}
@@ -295,7 +295,7 @@ public void testNestedUnionsWithDistributedRegexes() {
@Test
public void testNestedIntersectionsWithDistributedRegexes() {
String query = "(F == 'a' && F =~ 'ab.*') || (F == 'b' && F =~ 'ac.*')";
- String expected = "(F == 'a' && filter:includeRegex(F, 'ab.*')) || (F == 'b' && filter:includeRegex(F, 'ac.*'))";
+ String expected = "(F == 'a' && ((_Eval_ = true) && (F =~ 'ab.*'))) || (F == 'b' && ((_Eval_ = true) && (F =~ 'ac.*')))";
test(query, expected);
}
@@ -303,7 +303,7 @@ public void testNestedIntersectionsWithDistributedRegexes() {
@Test
public void testPartialAnchorAndNestedUnionRegex() {
String query = "(F == 'a' || F == 'b') && (F =~ 'ab.*' || F =~ 'ac.*')";
- String expected = "(F == 'a' || F == 'b') && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(F, 'ac.*'))";
+ String expected = "(F == 'a' || F == 'b') && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (F =~ 'ac.*')))";
test(query, expected);
}
@@ -311,7 +311,7 @@ public void testPartialAnchorAndNestedUnionRegex() {
@Test
public void testLeftAnchorAndDeeplyNestedRegex() {
String query = "F == 'a' && (F == 'b' || (F == 'c' && F =~ 'ab.*'))";
- String expected = "F == 'a' && (F == 'b' || (F == 'c' && filter:includeRegex(F, 'ab.*')))";
+ String expected = "F == 'a' && (F == 'b' || (F == 'c' && ((_Eval_ = true) && (F =~ 'ab.*'))))";
test(query, expected);
}
@@ -319,14 +319,14 @@ public void testLeftAnchorAndDeeplyNestedRegex() {
@Test
public void testRightAnchorAndDeeplyNestedRegex() {
String query = "((F =~ 'ab.*' && F == 'c') || F == 'b') && F == 'a'";
- String expected = "((filter:includeRegex(F, 'ab.*') && F == 'c') || F == 'b') && F == 'a'";
+ String expected = "((((_Eval_ = true) && (F =~ 'ab.*')) && F == 'c') || F == 'b') && F == 'a'";
test(query, expected);
}
@Test
public void testUnionOfTwoLegalRewrites() {
String query = "(F == 'a' && F =~ 'ab.*') || (F == 'b' && F =~ 'ac.*')";
- String expected = "(F == 'a' && filter:includeRegex(F, 'ab.*')) || (F == 'b' && filter:includeRegex(F, 'ac.*'))";
+ String expected = "(F == 'a' && ((_Eval_ = true) && (F =~ 'ab.*'))) || (F == 'b' && ((_Eval_ = true) && (F =~ 'ac.*')))";
test(query, expected);
}
@@ -340,13 +340,13 @@ public void testUnionOfTwoIllegalRewrites() {
@Test
public void testIncludeFieldsPreventNoRewrites() {
withIncludeFields(Set.of("F", "F2"));
- test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && filter:includeRegex(F, 'ab.*') && filter:includeRegex(F2, 'ac.*')");
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'ab.*')) && ((_Eval_ = true) && (F2 =~ 'ac.*'))");
}
@Test
public void testIncludeFieldsPreventSomeLegalRewrites() {
withIncludeFields(Set.of("F2"));
- test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && F =~ 'ab.*' && filter:includeRegex(F2, 'ac.*')");
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && F =~ 'ab.*' && ((_Eval_ = true) && (F2 =~ 'ac.*'))");
}
@Test
@@ -358,7 +358,7 @@ public void testExcludeFieldsPreventAllLegalRewrites() {
@Test
public void testExcludeFieldsPreventSomeLegalRewrites() {
withExcludeFields(Set.of("F2"));
- test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && filter:includeRegex(F, 'ab.*') && F2 =~ 'ac.*'");
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'ab.*')) && F2 =~ 'ac.*'");
}
@Test
@@ -374,7 +374,7 @@ public void testPatternBeatsExcludeFields() {
withPattern("F", "zz.*");
withExcludeFields(Set.of("F"));
// pattern beats exclude fields
- test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && filter:includeRegex(F, 'zz.*')");
+ test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'zz.*'))");
}
@Test
@@ -382,7 +382,7 @@ public void testPatternBeatsIncludeFields() {
withPattern("F", "zz.*");
withIncludeFields(Set.of("F2"));
// pattern beats include fields
- test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && filter:includeRegex(F, 'zz.*')");
+ test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'zz.*'))");
}
@Test
@@ -391,7 +391,7 @@ public void testPatternBeatsIncludeAndExcludeFields() {
withIncludeFields(Set.of("F2"));
withExcludeFields(Set.of("F"));
// pattern beats include fields
- test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && filter:includeRegex(F, 'zz.*')");
+ test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'zz.*'))");
}
/**
diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java
index 6b63061f7a3..a9be296a03c 100644
--- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java
+++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java
@@ -115,8 +115,6 @@ void testMarkers() {
// @formatter:off
String[] anchorMarkers = new String[] {
"((_Bounded_ = true) && (F > '2' && F < '5'))",
- "((_Delayed_ = true) && (F == '1'))",
- "((_Eval_ = true) && (F == '1'))",
"((_List_ = true) && ((id = 'id') && (field = 'F') && (params = '{\"ranges\":[[\"[r1\",\"r2]\"],[\"[r3\",\"f4]\"]]}')))",
"((_Value_ = true) && (F =~ 'ba.*'))",
"((_Term_ = true) && (_ANYFIELD_ =~ 'ba.*'))"
@@ -127,6 +125,8 @@ void testMarkers() {
// @formatter:off
String[] nonAnchorMarkers = new String[]{
+ "((_Delayed_ = true) && (F == '1'))",
+ "((_Eval_ = true) && (F == '1'))",
"((_Hole_ = true) && (F == '1'))",
"((_Drop_ = true) && (F == '1'))",
"((_Lenient_ = true) && (F == '1'))",
From 0edacf738abb3ea720bff2e73a9c1f48655e4824 Mon Sep 17 00:00:00 2001
From: Moriarty <22225248+apmoriarty@users.noreply.github.com>
Date: Tue, 5 Nov 2024 16:55:54 +0000
Subject: [PATCH 3/3] Extract regex options into class that supports pre index
expansion and post index expansion operations
---
.../query/planner/DefaultQueryPlanner.java | 65 ++++++--------
.../query/planner/RegexRewriteOptions.java | 87 +++++++++++++++++++
.../datawave/query/QueryLogicFactory.xml | 17 +++-
3 files changed, 127 insertions(+), 42 deletions(-)
create mode 100644 warehouse/query-core/src/main/java/datawave/query/planner/RegexRewriteOptions.java
diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
index 1caf556bb7d..cf455b2a68b 100644
--- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
+++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
@@ -309,10 +309,7 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable {
/**
* Controls optimistic rewriting of regex terms as filter functions, preserving overall query executability
*/
- protected boolean rewriteRegexTerms = false;
- protected Set regexIncludeFields;
- protected Set regexExcludeFields;
- protected Set regexRewritePatterns;
+ private RegexRewriteOptions regexRewriteOptions;
// handles boilerplate operations that surround a visitor's execution (e.g., timers, logging, validating)
private TimedVisitorManager visitorManager = new TimedVisitorManager();
@@ -349,6 +346,7 @@ protected DefaultQueryPlanner(DefaultQueryPlanner other) {
rangeStreamClass = other.rangeStreamClass;
setSourceLimit(other.sourceLimit);
setPushdownThreshold(other.getPushdownThreshold());
+ setRegexRewriteOptions(other.getRegexRewriteOptions());
setVisitorManager(other.getVisitorManager());
setTransformRules(other.getTransformRules() == null ? null : new ArrayList<>(other.transformRules));
}
@@ -823,8 +821,13 @@ protected ASTJexlScript updateQueryTree(ScannerFactory scannerFactory, MetadataH
}
// rewrite regex nodes, optimistically
- if (rewriteRegexTerms) {
- RewriteRegexVisitor.rewrite(config.getQueryTree(), indexedFields, indexOnlyFields, regexIncludeFields, regexExcludeFields, regexRewritePatterns);
+ if (regexRewriteOptions != null && regexRewriteOptions.isPreExpansionEnabled()) {
+ // @formatter:off
+ RewriteRegexVisitor.rewrite(config.getQueryTree(), indexedFields, indexOnlyFields,
+ regexRewriteOptions.getPreExpansionIncludeFields(),
+ regexRewriteOptions.getPreExpansionExcludeFields(),
+ regexRewriteOptions.getPreExpansionPatterns());
+ // @formatter:on
}
if (disableBoundedLookup) {
@@ -986,6 +989,16 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard
config.setQueryTree(timedPushFunctions(timers, config.getQueryTree(), config, metadataHelper));
}
+ // rewrite regex nodes, optimistically
+ if (regexRewriteOptions != null && regexRewriteOptions.isPostExpansionEnabled()) {
+ // @formatter:off
+ RewriteRegexVisitor.rewrite(config.getQueryTree(), indexedFields, indexOnlyFields,
+ regexRewriteOptions.getPostExpansionIncludeFields(),
+ regexRewriteOptions.getPostExpansionExcludeFields(),
+ regexRewriteOptions.getPostExpansionPatterns());
+ // @formatter:on
+ }
+
if (executableExpansion) {
config.setQueryTree(timedExecutableExpansion(timers, config.getQueryTree(), config, metadataHelper));
}
@@ -3254,38 +3267,6 @@ public static void setMaxTermsToPrint(int maxTermsToPrint) {
DefaultQueryPlanner.maxTermsToPrint = maxTermsToPrint;
}
- public boolean isRewriteRegexTerms() {
- return rewriteRegexTerms;
- }
-
- public void setRewriteRegexTerms(boolean rewriteRegexTerms) {
- this.rewriteRegexTerms = rewriteRegexTerms;
- }
-
- public Set getRegexIncludeFields() {
- return regexIncludeFields;
- }
-
- public void setRegexIncludeFields(Set regexIncludeFields) {
- this.regexIncludeFields = regexIncludeFields;
- }
-
- public Set getRegexExcludeFields() {
- return regexExcludeFields;
- }
-
- public void setRegexExcludeFields(Set regexExcludeFields) {
- this.regexExcludeFields = regexExcludeFields;
- }
-
- public Set getRegexRewritePatterns() {
- return regexRewritePatterns;
- }
-
- public void setRegexRewritePatterns(Set regexRewritePatterns) {
- this.regexRewritePatterns = regexRewritePatterns;
- }
-
/**
* Given a date, truncate it to year, month, date and increment the day by one to determine the following day.
*
@@ -3304,4 +3285,12 @@ public void finalize() {
builderThread.shutdown();
}
}
+
+ public RegexRewriteOptions getRegexRewriteOptions() {
+ return regexRewriteOptions;
+ }
+
+ public void setRegexRewriteOptions(RegexRewriteOptions regexRewriteOptions) {
+ this.regexRewriteOptions = regexRewriteOptions;
+ }
}
diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/RegexRewriteOptions.java b/warehouse/query-core/src/main/java/datawave/query/planner/RegexRewriteOptions.java
new file mode 100644
index 00000000000..966a957ec87
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/planner/RegexRewriteOptions.java
@@ -0,0 +1,87 @@
+package datawave.query.planner;
+
+import java.util.Collections;
+import java.util.Set;
+
+import datawave.query.jexl.visitors.RegexRewritePattern;
+import datawave.query.jexl.visitors.RewriteRegexVisitor;
+
+/**
+ * Provides fine-grain control over how the {@link RewriteRegexVisitor} operates pre and post index expansion
+ */
+public class RegexRewriteOptions {
+
+ private boolean preExpansionEnabled = false;
+ private Set preExpansionIncludeFields = Collections.emptySet();
+ private Set preExpansionExcludeFields = Collections.emptySet();
+ private Set preExpansionPatterns = Collections.emptySet();
+
+ private boolean postExpansionEnabled = false;
+ private Set postExpansionIncludeFields = Collections.emptySet();
+ private Set postExpansionExcludeFields = Collections.emptySet();
+ private Set postExpansionPatterns = Collections.emptySet();
+
+ public boolean isPreExpansionEnabled() {
+ return preExpansionEnabled;
+ }
+
+ public void setPreExpansionEnabled(boolean preExpansionEnabled) {
+ this.preExpansionEnabled = preExpansionEnabled;
+ }
+
+ public Set getPreExpansionIncludeFields() {
+ return preExpansionIncludeFields;
+ }
+
+ public void setPreExpansionIncludeFields(Set preExpansionIncludeFields) {
+ this.preExpansionIncludeFields = preExpansionIncludeFields;
+ }
+
+ public Set getPreExpansionExcludeFields() {
+ return preExpansionExcludeFields;
+ }
+
+ public void setPreExpansionExcludeFields(Set preExpansionExcludeFields) {
+ this.preExpansionExcludeFields = preExpansionExcludeFields;
+ }
+
+ public Set getPreExpansionPatterns() {
+ return preExpansionPatterns;
+ }
+
+ public void setPreExpansionPatterns(Set preExpansionPatterns) {
+ this.preExpansionPatterns = preExpansionPatterns;
+ }
+
+ public boolean isPostExpansionEnabled() {
+ return postExpansionEnabled;
+ }
+
+ public void setPostExpansionEnabled(boolean postExpansionEnabled) {
+ this.postExpansionEnabled = postExpansionEnabled;
+ }
+
+ public Set getPostExpansionIncludeFields() {
+ return postExpansionIncludeFields;
+ }
+
+ public void setPostExpansionIncludeFields(Set postExpansionIncludeFields) {
+ this.postExpansionIncludeFields = postExpansionIncludeFields;
+ }
+
+ public Set getPostExpansionExcludeFields() {
+ return postExpansionExcludeFields;
+ }
+
+ public void setPostExpansionExcludeFields(Set postExpansionExcludeFields) {
+ this.postExpansionExcludeFields = postExpansionExcludeFields;
+ }
+
+ public Set getPostExpansionPatterns() {
+ return postExpansionPatterns;
+ }
+
+ public void setPostExpansionPatterns(Set postExpansionPatterns) {
+ this.postExpansionPatterns = postExpansionPatterns;
+ }
+}
diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
index 29401d03c43..4c4e5f4fd3b 100644
--- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
+++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
@@ -368,12 +368,21 @@
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+