Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/integration' into task/protobuf-…
Browse files Browse the repository at this point in the history
…version-java
  • Loading branch information
avgAGB committed Jan 6, 2025
2 parents a3ed460 + df77935 commit d3236e0
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ export HADOOP_OPTS="-Dfile.encoding=UTF8 -Duser.timezone=GMT $HADOOP_INGEST_OPTS
export CHILD_MAP_OPTS="-Xmx${BULK_CHILD_MAP_MAX_MEMORY_MB}m -XX:+UseConcMarkSweepGC -Dfile.encoding=UTF8 -Duser.timezone=GMT -XX:+UseNUMA $CHILD_INGEST_OPTS"
export CHILD_REDUCE_OPTS="-Xmx${BULK_CHILD_REDUCE_MAX_MEMORY_MB}m -XX:+UseConcMarkSweepGC -Dfile.encoding=UTF8 -Duser.timezone=GMT -XX:+UseNUMA $CHILD_INGEST_OPTS"

echo $INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_(bulk)_.*\.flag' -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE -distCpConfDir $WAREHOUSE_HADOOP_CONF -mapred.map.child.java.opts=\"$CHILD_MAP_OPTS\" -mapred.reduce.child.java.opts=\"$CHILD_REDUCE_OPTS\" "${BATCHWRITER_OPTS}" $MAPRED_OPTS $EXTRA_OPTS
echo $INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_(bulk)_.*\.flag' -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE -distCpConfDir $WAREHOUSE_HADOOP_CONF -mapreduce.map.java.opts=\"$CHILD_MAP_OPTS\" -mapreduce.reduce.java.opts=\"$CHILD_REDUCE_OPTS\" "${BATCHWRITER_OPTS}" $MAPRED_OPTS $EXTRA_OPTS

$INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_(bulk)_.*\.flag' -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE -distCpConfDir $WAREHOUSE_HADOOP_CONF -mapred.map.child.java.opts="$CHILD_MAP_OPTS" -mapred.reduce.child.java.opts="$CHILD_REDUCE_OPTS" "${BATCHWRITER_OPTS}" $MAPRED_OPTS $EXTRA_OPTS
$INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_(bulk)_.*\.flag' -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE -distCpConfDir $WAREHOUSE_HADOOP_CONF -mapreduce.map.java.opts="$CHILD_MAP_OPTS" -mapreduce.reduce.java.opts="$CHILD_REDUCE_OPTS" "${BATCHWRITER_OPTS}" $MAPRED_OPTS $EXTRA_OPTS

RETURN_CODE=$?

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ export CHILD_MAP_OPTS="-Xmx${LIVE_CHILD_MAP_MAX_MEMORY_MB}m -XX:+UseConcMarkSwee
export CHILD_REDUCE_OPTS="-Xmx${LIVE_CHILD_REDUCE_MAX_MEMORY_MB}m -XX:+UseConcMarkSweepGC -Dfile.encoding=UTF8 -Duser.timezone=GMT -XX:+UseNUMA $CHILD_INGEST_OPTS"


echo $INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_live_.*\.flag' -mapred.map.child.java.opts=\"$CHILD_MAP_OPTS\" -mapred.reduce.child.java.opts=\"$CHILD_REDUCE_OPTS\" "${BATCHWRITER_OPTS}" $MAPRED_OPTS -outputMutations -mapOnly -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE $EXTRA_OPTS
echo $INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_live_.*\.flag' -mapreduce.map.java.opts=\"$CHILD_MAP_OPTS\" -mapreduce.reduce.java.opts=\"$CHILD_REDUCE_OPTS\" "${BATCHWRITER_OPTS}" $MAPRED_OPTS -outputMutations -mapOnly -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE $EXTRA_OPTS
echo "For decreased latency, one can add the -mapOnly flag at the cost of possibly overcounting duplicate records"

$INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_live_.*\.flag' -mapred.map.child.java.opts="$CHILD_MAP_OPTS" -mapred.reduce.child.java.opts="$CHILD_REDUCE_OPTS" "${BATCHWRITER_OPTS}" $MAPRED_OPTS -outputMutations -mapOnly -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE $EXTRA_OPTS
$INGEST_HADOOP_HOME/bin/hadoop jar ${DATAWAVE_INGEST_CORE_JAR} datawave.ingest.mapreduce.job.IngestJob -jt $INGEST_JOBTRACKER_NODE $INPUT_FILES ${INGEST_CONFIG[@]} -cacheBaseDir $JOB_CACHE_DIR -cacheJars $LIBJARS -user $USERNAME -pass $PASSWORD -instance $WAREHOUSE_INSTANCE_NAME -zookeepers $WAREHOUSE_ZOOKEEPERS -workDir $WORKDIR -flagFileDir ${FLAG_DIR} -flagFilePattern '.*_live_.*\.flag' -mapreduce.map.java.opts="$CHILD_MAP_OPTS" -mapreduce.reduce.java.opts="$CHILD_REDUCE_OPTS" "${BATCHWRITER_OPTS}" $MAPRED_OPTS -outputMutations -mapOnly -srcHdfs $INGEST_HDFS_NAME_NODE -destHdfs $WAREHOUSE_HDFS_NAME_NODE $EXTRA_OPTS

RETURN_CODE=$?

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,13 @@ public void testInvalidDataTypeFilter() {
test(lookup, "FIELD_A");
}

@Test
public void testWithNoBackingData() {
withDateRange("20240701", "20240709");
BoundedRangeIndexLookup lookup = createLookup("FIELD_A", "absent-lower", "absent-upper");
test(lookup, "FIELD_A");
}

private void test(BoundedRangeIndexLookup lookup, String field) {
lookup.submit();

Expand Down Expand Up @@ -305,7 +312,12 @@ public void largeRowInBoundedRangeTest() throws TableNotFoundException {
assertEquals(10001, scanner.getSeekCount()); // with new iterator this is initial seek + one seek per unique row in the range
// this represents data collapsed and sent back to the client by the WholeRowIterator
assertEquals(0, scanner.getNextCount()); // no next cals with seeking filter
assertTrue(map.get("FOO").isThresholdExceeded());
assertNotNull(map);
if (map.containsKey("FOO")) {
assertTrue(map.get("FOO").isThresholdExceeded());
} else {
assertTrue(map.isEmpty());
}
verifyAll();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
</property>
<property name="jobConfigurationProperties">
<map key-type="java.lang.String" value-type="java.lang.Object">
<entry key="mapred.map.tasks.speculative.execution" value="false" />
<entry key="mapreduce.map.speculative" value="false" />
</map>
</property>
<property name="jobSystemProperties">
Expand Down

0 comments on commit d3236e0

Please sign in to comment.