From 65f81aeff5d2eb4d3f28c4ddd5794ed4c148c245 Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Mon, 30 Dec 2024 17:17:08 +0000 Subject: [PATCH 1/2] Expand bounded range test assertions to handle different order of operations that arise from threading (#2677) --- .../jexl/lookups/BoundedRangeIndexLookupTest.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/lookups/BoundedRangeIndexLookupTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/lookups/BoundedRangeIndexLookupTest.java index df4dcd3b25..64f14db044 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/lookups/BoundedRangeIndexLookupTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/lookups/BoundedRangeIndexLookupTest.java @@ -228,6 +228,13 @@ public void testInvalidDataTypeFilter() { test(lookup, "FIELD_A"); } + @Test + public void testWithNoBackingData() { + withDateRange("20240701", "20240709"); + BoundedRangeIndexLookup lookup = createLookup("FIELD_A", "absent-lower", "absent-upper"); + test(lookup, "FIELD_A"); + } + private void test(BoundedRangeIndexLookup lookup, String field) { lookup.submit(); @@ -305,7 +312,12 @@ public void largeRowInBoundedRangeTest() throws TableNotFoundException { assertEquals(10001, scanner.getSeekCount()); // with new iterator this is initial seek + one seek per unique row in the range // this represents data collapsed and sent back to the client by the WholeRowIterator assertEquals(0, scanner.getNextCount()); // no next cals with seeking filter - assertTrue(map.get("FOO").isThresholdExceeded()); + assertNotNull(map); + if (map.containsKey("FOO")) { + assertTrue(map.get("FOO").isThresholdExceeded()); + } else { + assertTrue(map.isEmpty()); + } verifyAll(); } } From df77935d9f017e5671511dc6edb3ee673e6aea0d Mon Sep 17 00:00:00 2001 From: austin007008 <143425397+austin007008@users.noreply.github.com> Date: Mon, 30 Dec 2024 14:53:34 -0500 Subject: [PATCH 2/2] replace deprecated config options (#2169) Co-authored-by: emiliodskinner --- .../src/main/resources/bin/ingest/bulk-ingest.sh | 4 ++-- .../src/main/resources/bin/ingest/live-ingest.sh | 4 ++-- .../src/test/resources/datawave/mapreduce/MapReduceJobs.xml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/warehouse/ingest-scripts/src/main/resources/bin/ingest/bulk-ingest.sh b/warehouse/ingest-scripts/src/main/resources/bin/ingest/bulk-ingest.sh index fe30703f42..7936e750c9 100755 --- a/warehouse/ingest-scripts/src/main/resources/bin/ingest/bulk-ingest.sh +++ b/warehouse/ingest-scripts/src/main/resources/bin/ingest/bulk-ingest.sh @@ -72,9 +72,9 @@ export HADOOP_OPTS="-Dfile.encoding=UTF8 -Duser.timezone=GMT $HADOOP_INGEST_OPTS export CHILD_MAP_OPTS="-Xmx${BULK_CHILD_MAP_MAX_MEMORY_MB}m -XX:+UseConcMarkSweepGC -Dfile.encoding=UTF8 -Duser.timezone=GMT -XX:+UseNUMA $CHILD_INGEST_OPTS" export CHILD_REDUCE_OPTS="-Xmx${BULK_CHILD_REDUCE_MAX_MEMORY_MB}m -XX:+UseConcMarkSweepGC -Dfile.encoding=UTF8 -Duser.timezone=GMT -XX:+UseNUMA $CHILD_INGEST_OPTS" -echo $INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_(bulk)_.*\.flag' -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE -distCpConfDir $WAREHOUSE_HADOOP_CONF -mapred.map.child.java.opts=\"$CHILD_MAP_OPTS\" -mapred.reduce.child.java.opts=\"$CHILD_REDUCE_OPTS\" "${BATCHWRITER_OPTS}" $MAPRED_OPTS $EXTRA_OPTS +echo $INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_(bulk)_.*\.flag' -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE -distCpConfDir $WAREHOUSE_HADOOP_CONF -mapreduce.map.java.opts=\"$CHILD_MAP_OPTS\" -mapreduce.reduce.java.opts=\"$CHILD_REDUCE_OPTS\" "${BATCHWRITER_OPTS}" $MAPRED_OPTS $EXTRA_OPTS -$INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_(bulk)_.*\.flag' -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE -distCpConfDir $WAREHOUSE_HADOOP_CONF -mapred.map.child.java.opts="$CHILD_MAP_OPTS" -mapred.reduce.child.java.opts="$CHILD_REDUCE_OPTS" "${BATCHWRITER_OPTS}" $MAPRED_OPTS $EXTRA_OPTS +$INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_(bulk)_.*\.flag' -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE -distCpConfDir $WAREHOUSE_HADOOP_CONF -mapreduce.map.java.opts="$CHILD_MAP_OPTS" -mapreduce.reduce.java.opts="$CHILD_REDUCE_OPTS" "${BATCHWRITER_OPTS}" $MAPRED_OPTS $EXTRA_OPTS RETURN_CODE=$? diff --git a/warehouse/ingest-scripts/src/main/resources/bin/ingest/live-ingest.sh b/warehouse/ingest-scripts/src/main/resources/bin/ingest/live-ingest.sh index cf7d49007c..738b4a12ad 100755 --- a/warehouse/ingest-scripts/src/main/resources/bin/ingest/live-ingest.sh +++ b/warehouse/ingest-scripts/src/main/resources/bin/ingest/live-ingest.sh @@ -71,10 +71,10 @@ export CHILD_MAP_OPTS="-Xmx${LIVE_CHILD_MAP_MAX_MEMORY_MB}m -XX:+UseConcMarkSwee export CHILD_REDUCE_OPTS="-Xmx${LIVE_CHILD_REDUCE_MAX_MEMORY_MB}m -XX:+UseConcMarkSweepGC -Dfile.encoding=UTF8 -Duser.timezone=GMT -XX:+UseNUMA $CHILD_INGEST_OPTS" -echo $INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_live_.*\.flag' -mapred.map.child.java.opts=\"$CHILD_MAP_OPTS\" -mapred.reduce.child.java.opts=\"$CHILD_REDUCE_OPTS\" "${BATCHWRITER_OPTS}" $MAPRED_OPTS -outputMutations -mapOnly -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE $EXTRA_OPTS +echo $INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_live_.*\.flag' -mapreduce.map.java.opts=\"$CHILD_MAP_OPTS\" -mapreduce.reduce.java.opts=\"$CHILD_REDUCE_OPTS\" "${BATCHWRITER_OPTS}" $MAPRED_OPTS -outputMutations -mapOnly -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE $EXTRA_OPTS echo "For decreased latency, one can add the -mapOnly flag at the cost of possibly overcounting duplicate records" -$INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_live_.*\.flag' -mapred.map.child.java.opts="$CHILD_MAP_OPTS" -mapred.reduce.child.java.opts="$CHILD_REDUCE_OPTS" "${BATCHWRITER_OPTS}" $MAPRED_OPTS -outputMutations -mapOnly -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE $EXTRA_OPTS +$INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_live_.*\.flag' -mapreduce.map.java.opts="$CHILD_MAP_OPTS" -mapreduce.reduce.java.opts="$CHILD_REDUCE_OPTS" "${BATCHWRITER_OPTS}" $MAPRED_OPTS -outputMutations -mapOnly -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE $EXTRA_OPTS RETURN_CODE=$? diff --git a/web-services/map-reduce/src/test/resources/datawave/mapreduce/MapReduceJobs.xml b/web-services/map-reduce/src/test/resources/datawave/mapreduce/MapReduceJobs.xml index b8b4fc5d15..c5fa0ba674 100644 --- a/web-services/map-reduce/src/test/resources/datawave/mapreduce/MapReduceJobs.xml +++ b/web-services/map-reduce/src/test/resources/datawave/mapreduce/MapReduceJobs.xml @@ -58,7 +58,7 @@ - +