Skip to content

Commit

Permalink
Geo query optimization (#1710)
Browse files Browse the repository at this point in the history
* Created a GeoUtils class which can be used to optimize queries against geo type fields.

* pr feedback

* Updated type-utils version to 1.10

* Updated autoupdate to use ubuntu-latest to avoid scheduled github ubuntu brownout

* Updated type-utils to 1.11, which include jts 1.19.0

* Fixed test broken by merge
  • Loading branch information
jwomeara authored Nov 17, 2022
1 parent 0646600 commit 1f2d107
Show file tree
Hide file tree
Showing 15 changed files with 856 additions and 76 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/autoupdate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
jobs:
autoupdate:
name: autoupdate
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: docker://chinthakagodawita/autoupdate-action:v1
continue-on-error: true
Expand Down
12 changes: 9 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
<version.curator>2.13.0</version.curator>
<version.deltaspike>1.9.0</version.deltaspike>
<version.easymock>4.0.2</version.easymock>
<version.geoserver>2.14.2</version.geoserver>
<version.geotools>20.1</version.geotools>
<version.geoserver>2.21.1</version.geoserver>
<version.geotools>27.1</version.geotools>
<version.geowave>1.1.0</version.geowave>
<version.google-guava>15.0</version.google-guava>
<version.googlecode-findbugs>2.0.3</version.googlecode-findbugs>
Expand All @@ -70,6 +70,7 @@
<version.jetty>6.1.26</version.jetty>
<version.jgroups>4.0.19.Final</version.jgroups>
<version.jjwt>0.11.2</version.jjwt>
<version.jts>1.19.0</version.jts>
<version.junit>4.13.2</version.junit>
<version.kryo>2.20</version.kryo>
<version.kryonet>2.20</version.kryonet>
Expand All @@ -86,7 +87,7 @@
<version.microservice.metadata-utils>1.10</version.microservice.metadata-utils>
<version.microservice.metrics-reporter>1.2</version.microservice.metrics-reporter>
<version.microservice.query-metric-api>1.5.7</version.microservice.query-metric-api>
<version.microservice.type-utils>1.8</version.microservice.type-utils>
<version.microservice.type-utils>1.11</version.microservice.type-utils>
<version.minlog>1.2</version.minlog>
<version.mysql-connector>8.0.16</version.mysql-connector>
<version.netty>4.1.42.Final</version.netty>
Expand Down Expand Up @@ -881,6 +882,11 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.locationtech.jts</groupId>
<artifactId>jts-core</artifactId>
<version>${version.jts}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jul-to-slf4j</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
* Used to determine the maximum number of query ranges to generate when performing a geowave query against a PointType field.
*/
private int pointMaxExpansion = 32;
/**
* Used to determine the maximum number of query ranges to generate when performing a geo query against a GeoType field.
*/
private int geoMaxExpansion = 32;
/**
* Used during geowave range optimization to determine the minimum number of sub-ranges we should split a range into.
*/
Expand Down Expand Up @@ -450,6 +454,7 @@ public ShardQueryConfiguration(ShardQueryConfiguration other) {
this.setRangeBufferPollMillis(other.getRangeBufferPollMillis());
this.setGeometryMaxExpansion(other.getGeometryMaxExpansion());
this.setPointMaxExpansion(other.getPointMaxExpansion());
this.setGeoMaxExpansion(other.getGeoMaxExpansion());
this.setGeoWaveRangeSplitThreshold(other.getGeoWaveRangeSplitThreshold());
this.setGeoWaveMaxRangeOverlap(other.getGeoWaveMaxRangeOverlap());
this.setOptimizeGeoWaveRanges(other.isOptimizeGeoWaveRanges());
Expand Down Expand Up @@ -926,6 +931,14 @@ public void setPointMaxExpansion(int pointMaxExpansion) {
this.pointMaxExpansion = pointMaxExpansion;
}

public int getGeoMaxExpansion() {
return geoMaxExpansion;
}

public void setGeoMaxExpansion(int geoMaxExpansion) {
this.geoMaxExpansion = geoMaxExpansion;
}

public int getGeoWaveRangeSplitThreshold() {
return geoWaveRangeSplitThreshold;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import datawave.query.jexl.nodes.BoundedRange;
import datawave.query.jexl.visitors.EventDataQueryExpressionVisitor;
import datawave.query.util.DateIndexHelper;
import datawave.query.util.GeoUtils;
import datawave.query.util.MetadataHelper;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.commons.jexl2.parser.ASTFunctionNode;
Expand All @@ -30,8 +31,9 @@
import org.apache.log4j.Logger;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.CoordinateXY;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.GeometryFactory;
import org.locationtech.jts.geom.MultiPolygon;
import org.locationtech.jts.geom.Polygon;
import org.locationtech.jts.util.GeometricShapeFactory;

Expand Down Expand Up @@ -86,9 +88,23 @@ public JexlNode getIndexQuery(ShardQueryConfiguration config, MetadataHelper hel
if (args.size() == 3) {
double[] ll = geoNormalizer.parseLatLon(args.get(1).image);
double[] ur = geoNormalizer.parseLatLon(args.get(2).image);
char splitChar = args.get(1).image.charAt(geoNormalizer.findSplit(args.get(1).image));

returnNode = getIndexNode(args.get(0), ll[1], ur[1], ll[0], ur[0], Character.toString(splitChar));
// is the lower left longitude greater than the upper right longitude?
// if so, we have crossed the anti-meridian and should split
Geometry geom;
List<Envelope> envs = new ArrayList<>();
if (ll[1] > ur[1]) {
Polygon poly1 = createRectangle(ll[1], 180.0, ll[0], ur[0]);
Polygon poly2 = createRectangle(-180.0, ur[1], ll[0], ur[0]);
geom = createGeometryCollection(poly1, poly2);
envs.add(poly1.getEnvelopeInternal());
envs.add(poly2.getEnvelopeInternal());
} else {
geom = createRectangle(ll[1], ur[1], ll[0], ur[0]);
envs.add(geom.getEnvelopeInternal());
}

returnNode = getIndexNode(geom, envs, getFieldNames(args.get(0)), config.getGeoMaxExpansion());
} else {

double minLat, maxLat, minLon, maxLon;
Expand Down Expand Up @@ -128,7 +144,7 @@ public JexlNode getIndexQuery(ShardQueryConfiguration config, MetadataHelper hel
BoundedRange.create(JexlNodeFactory.createAndNode(Arrays.asList(geLatNode2, leLatNode2)))));

// link em up
returnNode = JexlNodeFactory.createAndNode(Arrays.asList(andNode1, andNode2));
returnNode = JexlNodeFactory.createOrNode(Arrays.asList(andNode1, andNode2));
} else {
JexlNode geLonNode = JexlNodeFactory.buildNode(new ASTGENode(ParserTreeConstants.JJTGENODE), args.get(0), Double.toString(minLon));
JexlNode leLonNode = JexlNodeFactory.buildNode(new ASTLENode(ParserTreeConstants.JJTLENODE), args.get(0), Double.toString(maxLon));
Expand Down Expand Up @@ -173,26 +189,47 @@ public JexlNode getIndexQuery(ShardQueryConfiguration config, MetadataHelper hel
double lat = c.getLatitude();
double lon = c.getLongitude();

returnNode = getIndexNode(args.get(0), lon - radius, lon + radius, lat - radius, lat + radius, GeoNormalizer.separator);

returnNode = getIndexNode(createCircle(lon, lat, radius), getFieldNames(args.get(0)), config.getGeoMaxExpansion());
}
return returnNode;
}

public static JexlNode getIndexNode(JexlNode fieldsNode, double minLon, double maxLon, double minLat, double maxLat, String splitChar) {
JexlNode indexNode;
List<JexlNode> indexNodes = Lists.newArrayList();
if (fieldsNode.jjtGetNumChildren() > 1) {
for (int i = 0; i < fieldsNode.jjtGetNumChildren(); i++) {
JexlNode kid = JexlASTHelper.dereference(fieldsNode.jjtGetChild(i));
public static List<String> getFieldNames(JexlNode node) {
List<String> fieldNames = new ArrayList<>();
if (node.jjtGetNumChildren() > 1) {
for (int i = 0; i < node.jjtGetNumChildren(); i++) {
JexlNode kid = JexlASTHelper.dereference(node.jjtGetChild(i));
if (kid.image != null) {
indexNodes.add(getIndexNode(kid.image, minLon, maxLon, minLat, maxLat, splitChar));
fieldNames.add(kid.image);
}
}
} else {
indexNodes.add(getIndexNode(fieldsNode.image, minLon, maxLon, minLat, maxLat, splitChar));
fieldNames.add(node.image);
}
return fieldNames;
}

public static JexlNode getIndexNode(Geometry geometry, List<String> fieldNames, int maxExpansion) {
return getIndexNode(geometry, Collections.singletonList(geometry.getEnvelopeInternal()), fieldNames, maxExpansion);
}

public static JexlNode getIndexNode(Geometry geometry, List<Envelope> envs, List<String> fieldNames, int maxExpansion) {
List<String[]> indexRanges = GeoUtils.generateOptimizedIndexRanges(geometry, envs, maxExpansion);

List<JexlNode> indexNodes = Lists.newArrayList();
for (String fieldName : fieldNames) {
if (fieldName != null) {
for (String[] indexRange : indexRanges) {
// @formatter:off
indexNodes.add(BoundedRange.create(JexlNodeFactory.createAndNode(Arrays.asList(
JexlNodeFactory.buildNode(new ASTGENode(ParserTreeConstants.JJTGENODE), fieldName, indexRange[0]),
JexlNodeFactory.buildNode(new ASTLENode(ParserTreeConstants.JJTLENODE), fieldName, indexRange[1])))));
// @formatter:on
}
}
}

JexlNode indexNode;
if (!indexNodes.isEmpty()) {
if (indexNodes.size() > 1) {
indexNode = JexlNodeFactory.createOrNode(indexNodes);
Expand All @@ -206,37 +243,6 @@ public static JexlNode getIndexNode(JexlNode fieldsNode, double minLon, double m
return indexNode;
}

public static JexlNode getIndexNode(String fieldName, double minLon, double maxLon, double minLat, double maxLat, String splitChar) {
JexlNode indexNode;
// is the lower left longitude greater than the upper right longitude?
// if so, we have crossed the anti-meridian and should split
if (minLon > maxLon) {
JexlNode geNode1 = JexlNodeFactory.buildNode(new ASTGENode(ParserTreeConstants.JJTGENODE), fieldName, minLat + splitChar + minLon);
JexlNode leNode1 = JexlNodeFactory.buildNode(new ASTLENode(ParserTreeConstants.JJTLENODE), fieldName, maxLat + splitChar + "180");

// now link em up
JexlNode andNode1 = BoundedRange.create(JexlNodeFactory.createAndNode(Arrays.asList(geNode1, leNode1)));

JexlNode geNode2 = JexlNodeFactory.buildNode(new ASTGENode(ParserTreeConstants.JJTGENODE), fieldName, minLat + splitChar + "-180");
JexlNode leNode2 = JexlNodeFactory.buildNode(new ASTLENode(ParserTreeConstants.JJTLENODE), fieldName, maxLat + splitChar + maxLon);

// now link em up
JexlNode andNode2 = BoundedRange.create(JexlNodeFactory.createAndNode(Arrays.asList(geNode2, leNode2)));

// link em all up
indexNode = JexlNodeFactory.createAndNode(Arrays.asList(andNode1, andNode2));

} else {
JexlNode geNode = JexlNodeFactory.buildNode(new ASTGENode(ParserTreeConstants.JJTGENODE), fieldName, minLat + splitChar + minLon);
JexlNode leNode = JexlNodeFactory.buildNode(new ASTLENode(ParserTreeConstants.JJTLENODE), fieldName, maxLat + splitChar + maxLon);

// now link em up
indexNode = BoundedRange.create(JexlNodeFactory.createAndNode(Arrays.asList(geNode, leNode)));
}

return indexNode;
}

@Override
public void addFilters(AttributeFactory attributeFactory, Map<String,EventDataQueryExpressionVisitor.ExpressionFilter> filterMap) {
// noop, covered by getIndexQuery (see comments on interface)
Expand Down Expand Up @@ -370,7 +376,7 @@ public JexlNode toGeoWaveFunction(Set<String> fields) throws Exception {
// is the lower left longitude greater than the upper right longitude?
// if so, we have crossed the anti-meridian and should split
if (ll[1] > ur[1]) {
wkt = createMultiPolygon(createRectangle(ll[1], 180.0, ll[0], ur[0]), createRectangle(-180.0, ur[1], ll[0], ur[0])).toText();
wkt = createGeometryCollection(createRectangle(ll[1], 180.0, ll[0], ur[0]), createRectangle(-180.0, ur[1], ll[0], ur[0])).toText();
} else {
wkt = createRectangle(ll[1], ur[1], ll[0], ur[0]).toText();
}
Expand Down Expand Up @@ -420,9 +426,9 @@ private Polygon createRectangle(double minLon, double maxLon, double minLat, dou
return geomFactory.createPolygon(coordinates.toArray(new Coordinate[0]));
}

private MultiPolygon createMultiPolygon(Polygon poly1, Polygon poly2) {
private Geometry createGeometryCollection(Polygon poly1, Polygon poly2) {
GeometryFactory geomFactory = new GeometryFactory();
return geomFactory.createMultiPolygon(new Polygon[] {poly1, poly2});
return geomFactory.createGeometryCollection(new Geometry[] {poly1, poly2});
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,9 @@ protected static JexlNode getIndexNode(String fieldName, Geometry geometry, List
else if (indexTypes.remove(IndexType.GEOWAVE_POINT)) {
indexNodes.add(generateGeoWaveRanges(fieldName, geometry, envs, config, PointNormalizer.index, config.getPointMaxExpansion()));
}

// generate ranges for geo points
if (indexTypes.remove(IndexType.GEO_POINT)) {
indexNodes.add(generateGeoRanges(fieldName, envs));
else if (indexTypes.remove(IndexType.GEO_POINT)) {
indexNodes.add(generateGeoRanges(fieldName, geometry, envs, config.getGeoMaxExpansion()));
}

JexlNode indexNode;
Expand Down Expand Up @@ -223,19 +222,17 @@ protected static JexlNode generateGeoWaveRanges(String fieldName, Geometry geome
return JexlNodeFactory.createOrNode(rangeNodes);
}

protected static JexlNode generateGeoRanges(String fieldName, List<Envelope> envs) {
protected static JexlNode generateGeoRanges(String fieldName, Geometry geometry, List<Envelope> envs, int maxExpansion) {
JexlNode indexNode;
List<JexlNode> indexNodes = new ArrayList<>();
for (Envelope env : envs) {
// @formatter:off
indexNodes.add(
GeoFunctionsDescriptor.GeoJexlArgumentDescriptor.getIndexNode(
fieldName,
env.getMinX(),
env.getMaxX(),
env.getMinY(),
env.getMaxY(),
GeoNormalizer.separator));
geometry,
envs,
Collections.singletonList(fieldName),
maxExpansion));
// @formatter:on
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
import com.google.common.collect.Multimap;
import datawave.data.normalizer.GeometryNormalizer;
import datawave.data.type.AbstractGeometryType;
import datawave.data.type.GeoType;
import datawave.data.type.Type;
import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.functions.GeoWaveFunctionsDescriptor;
import datawave.query.jexl.functions.JexlFunctionArgumentDescriptorFactory;
import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor;
import datawave.query.util.GeoUtils;
import datawave.query.util.GeoWaveUtils;
import datawave.query.util.MetadataHelper;
import datawave.webservice.common.logging.ThreadConfigurableLogger;
Expand Down Expand Up @@ -82,7 +84,19 @@ public Object visit(ASTAndNode node, Object data) {

private boolean isPrunable(GeoWaveFunctionsDescriptor.GeoWaveJexlArgumentDescriptor geoWaveDesc) {
Set<String> fields = geoWaveDesc.fields(metadataHelper, null);
return fields.stream().anyMatch(field -> getDatatypesForField(field).stream().anyMatch(type -> type instanceof AbstractGeometryType));
// @formatter:off
return fields.stream().anyMatch(
field -> getDatatypesForField(field).stream().anyMatch(
type -> (type instanceof AbstractGeometryType || type instanceof GeoType)));
// @formatter:on
}

private boolean isGeoWaveType(String field) {
return getDatatypesForField(field).stream().anyMatch(type -> type instanceof AbstractGeometryType);
}

private boolean isGeoType(String field) {
return getDatatypesForField(field).stream().anyMatch(type -> type instanceof GeoType);
}

private Set<Type<?>> getDatatypesForField(String field) {
Expand Down Expand Up @@ -133,7 +147,11 @@ public Object visit(ASTEQNode node, Object data) {
if (value != null) {
Geometry nodeGeometry = null;
try {
nodeGeometry = GeoWaveUtils.positionToGeometry(value);
if (isGeoWaveType(field)) {
nodeGeometry = GeoWaveUtils.positionToGeometry(value);
} else if (isGeoType(field)) {
nodeGeometry = GeoUtils.indexToGeometry(value);
}
} catch (Exception e) {
log.warn("Unable to extract geometry from geo term: " + value, e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2312,6 +2312,14 @@ public void setPointMaxExpansion(int pointMaxExpansion) {
getConfig().setPointMaxExpansion(pointMaxExpansion);
}

public int getGeoMaxExpansion() {
return getConfig().getGeoMaxExpansion();
}

public void setGeoMaxExpansion(int geoMaxExpansion) {
getConfig().setGeoMaxExpansion(geoMaxExpansion);
}

public int getGeoWaveRangeSplitThreshold() {
return getConfig().getGeoWaveRangeSplitThreshold();
}
Expand Down
Loading

0 comments on commit 1f2d107

Please sign in to comment.