Skip to content

Commit

Permalink
Exit earlier from loop when a new Result is created
Browse files Browse the repository at this point in the history
 *   3) Make a cache of long[] name to String, to avoid `ByteBuffer.allocate`
 * and creating new UTF-8 strings. I didn't profile, so it's just a guess
 * that this map will be a bit faster. Although it's outside the main loop, so
 * not a big difference ...;
 *   4) Exit earlier from loop if a new entry was created.
  • Loading branch information
tivrfoa committed Jan 30, 2024
1 parent 7f0e517 commit e0ab6f6
Showing 1 changed file with 63 additions and 30 deletions.
93 changes: 63 additions & 30 deletions src/main/java/dev/morling/onebrc/CalculateAverage_tivrfoa.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,18 @@
* stay idle, so:
* 1) Create more chunks than threads, so the ones that finish first
* can do something;
* 2) Decrease chunk sizes as we get closer to the end of the file.
* 2) Decrease chunk sizes as we get closer to the end of the file;
* 3) Make a cache of long[] name to String, to avoid `ByteBuffer.allocate`
* and creating new UTF-8 strings. I didn't profile, so it's just a guess
* that this map will be a bit faster. Although it's outside the main loop, so
* not a big difference ...;
* 4) Exit earlier from loop if a new entry was created.
*/
public class CalculateAverage_tivrfoa {
private static final String FILE = "./measurements.txt";
private static final int MIN_TEMP = -999;
private static final int MAX_TEMP = 999;

private static final int MAX_CITIES = 10_000;
private static final HashMap<Key, String> mapToCityName = new HashMap<>(MAX_CITIES);

// Holding the current result for a single city.
private static class Result {
Expand All @@ -63,9 +69,11 @@ private static class Result {
short min, max;
long sum;

private Result() {
this.min = MAX_TEMP;
this.max = MIN_TEMP;
private Result(short number) {
this.min = number;
this.max = number;
this.sum = number;
this.count = 1;
}

public String toString() {
Expand All @@ -88,14 +96,50 @@ private void add(Result other) {
count += other.count;
}

private void add(short number) {
if (number < min) {
min = number;
}
if (number > max) {
max = number;
}
sum += number;
count++;
}

public String calcName() {
var key = new Key(name);
var cityName = mapToCityName.get(key);
if (cityName != null) {
return cityName;
}
ByteBuffer bb = ByteBuffer.allocate(name.length * Long.BYTES).order(ByteOrder.nativeOrder());
bb.asLongBuffer().put(name);
byte[] array = bb.array();
int i = 0;
while (array[i++] != ';')
;
return new String(array, 0, i - 1, StandardCharsets.UTF_8);
cityName = new String(array, 0, i - 1, StandardCharsets.UTF_8);
mapToCityName.put(key, cityName);
return cityName;
}
}

private static class Key {
private long[] name;

public Key(long[] name) {
this.name = name;
}

@Override
public final int hashCode() {
return Arrays.hashCode(name);
}

@Override
public final boolean equals(Object o) {
return Arrays.equals(name, ((Key) o).name);
}
}

Expand All @@ -106,7 +150,7 @@ public String calcName() {

private static final class SolveChunk extends Thread {
private long chunkStart, chunkEnd;
private Result[] results = new Result[10_000];
private Result[] results = new Result[MAX_CITIES];
private Result[] buckets = new Result[1 << 17];
private int resIdx = 0;

Expand Down Expand Up @@ -141,15 +185,15 @@ private void parseLoop() {
word = mask(word, pos);
hash = word;

int number = scanNumber(scanner);
short number = scanNumber(scanner);
long nextWord = scanner.getLong();
long nextPos = findDelimiter(nextWord);

Result existingResult = buckets[hashToIndex(hash, buckets)];
if (existingResult != null && existingResult.lastNameLong == word) {
existingResult.add(number);
word = nextWord;
pos = nextPos;
record(existingResult, number);
continue;
}

Expand All @@ -169,10 +213,9 @@ private void parseLoop() {

Result existingResult = buckets[hashToIndex(hash, buckets)];
if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) {
int number = scanNumber(scanner);
existingResult.add(scanNumber(scanner));
word = scanner.getLong();
pos = findDelimiter(word);
record(existingResult, number);
continue;
}
}
Expand All @@ -199,15 +242,16 @@ private void parseLoop() {

// Save length of name for later.
int nameLength = (int) (scanner.pos() - nameAddress);
int number = scanNumber(scanner);
short number = scanNumber(scanner);

// Final calculation for index into hash table.
int tableIndex = hashToIndex(hash, buckets);
outer: while (true) {
Result existingResult = buckets[tableIndex];
if (existingResult == null) {
existingResult = newEntry(buckets, nameAddress, tableIndex, nameLength, scanner);
existingResult = newEntry(buckets, number, nameAddress, tableIndex, nameLength, scanner);
results[resIdx++] = existingResult;
break;
}
// Check for collision.
int i = 0;
Expand All @@ -221,7 +265,7 @@ private void parseLoop() {

int remainingShift = (64 - (nameLength + 1 - i) << 3);
if (((existingResult.lastNameLong ^ (scanner.getLongAt(nameAddress + i) << remainingShift)) == 0)) {
record(existingResult, number);
existingResult.add(number);
break;
}
else {
Expand Down Expand Up @@ -294,24 +338,13 @@ private static void spawnWorker() throws IOException {
.transferTo(System.out);
}

private static int scanNumber(Scanner scanPtr) {
private static short scanNumber(Scanner scanPtr) {
scanPtr.add(1);
long numberWord = scanPtr.getLong();
int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000);
int number = convertIntoNumber(decimalSepPos, numberWord);
scanPtr.add((decimalSepPos >>> 3) + 3);
return number;
}

private static void record(Result existingResult, int number) {
if (number < existingResult.min) {
existingResult.min = (short) number;
}
if (number > existingResult.max) {
existingResult.max = (short) number;
}
existingResult.sum += number;
existingResult.count++;
return (short) number;
}

private static int hashToIndex(long hash, Result[] results) {
Expand Down Expand Up @@ -346,8 +379,8 @@ private static long findDelimiter(long word) {
return tmp;
}

private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner) {
Result r = new Result();
private static Result newEntry(Result[] results, short number, long nameAddress, int hash, int nameLength, Scanner scanner) {
var r = new Result(number);
results[hash] = r;
long[] name = new long[(nameLength / Long.BYTES) + 1];
int pos = 0;
Expand Down

0 comments on commit e0ab6f6

Please sign in to comment.