Skip to content

Commit

Permalink
Refactored implementation to use explicit switch and unit test update
Browse files Browse the repository at this point in the history
  • Loading branch information
dtspence committed Oct 25, 2024
1 parent 5f20b24 commit 47d1cf3
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 42 deletions.
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
package datawave.ingest.data.config;

import java.util.EnumMap;
import java.util.Map;
import java.util.function.Function;
import java.util.function.Predicate;

import org.apache.commons.collections4.map.LRUMap;

import com.google.common.annotations.VisibleForTesting;

public class CachedFieldConfigHelper implements FieldConfigHelper {
private final FieldConfigHelper underlyingHelper;
private final Map<String,ResultEntry> resultCache;
private final Map<String,CachedEntry> resultCache;

enum AttributeType {
INDEXED_FIELD, REVERSE_INDEXED_FIELD, TOKENIZED_FIELD, REVERSE_TOKENIZED_FIELD, STORED_FIELD, INDEXED_ONLY
INDEXED_FIELD, REVERSE_INDEXED_FIELD, TOKENIZED_FIELD, REVERSE_TOKENIZED_FIELD, STORED_FIELD, INDEX_ONLY_FIELD
}

public CachedFieldConfigHelper(FieldConfigHelper helper, int limit) {
Expand All @@ -26,50 +25,96 @@ public CachedFieldConfigHelper(FieldConfigHelper helper, int limit) {

@Override
public boolean isStoredField(String fieldName) {
return getOrEvaluate(AttributeType.STORED_FIELD, fieldName, underlyingHelper::isStoredField);
return getFieldResult(AttributeType.STORED_FIELD, fieldName, underlyingHelper::isStoredField);
}

@Override
public boolean isIndexedField(String fieldName) {
return getOrEvaluate(AttributeType.INDEXED_FIELD, fieldName, underlyingHelper::isIndexedField);
return getFieldResult(AttributeType.INDEXED_FIELD, fieldName, underlyingHelper::isIndexedField);
}

@Override
public boolean isIndexOnlyField(String fieldName) {
return getOrEvaluate(AttributeType.INDEXED_ONLY, fieldName, underlyingHelper::isIndexOnlyField);
return getFieldResult(AttributeType.INDEX_ONLY_FIELD, fieldName, underlyingHelper::isIndexOnlyField);
}

@Override
public boolean isReverseIndexedField(String fieldName) {
return getOrEvaluate(AttributeType.REVERSE_INDEXED_FIELD, fieldName, underlyingHelper::isReverseIndexedField);
return getFieldResult(AttributeType.REVERSE_INDEXED_FIELD, fieldName, underlyingHelper::isReverseIndexedField);
}

@Override
public boolean isTokenizedField(String fieldName) {
return getOrEvaluate(AttributeType.TOKENIZED_FIELD, fieldName, underlyingHelper::isTokenizedField);
return getFieldResult(AttributeType.TOKENIZED_FIELD, fieldName, underlyingHelper::isTokenizedField);
}

@Override
public boolean isReverseTokenizedField(String fieldName) {
return getOrEvaluate(AttributeType.REVERSE_TOKENIZED_FIELD, fieldName, underlyingHelper::isReverseTokenizedField);
return getFieldResult(AttributeType.REVERSE_TOKENIZED_FIELD, fieldName, underlyingHelper::isReverseTokenizedField);
}

@VisibleForTesting
boolean getOrEvaluate(AttributeType attributeType, String fieldName, Function<String,Boolean> evaluateFn) {
return resultCache.computeIfAbsent(fieldName, ResultEntry::new).resolveResult(attributeType, evaluateFn);
boolean getFieldResult(AttributeType attributeType, String fieldName, Predicate<String> fn) {
return resultCache.computeIfAbsent(fieldName, CachedEntry::new).get(attributeType).getResultOrEvaluate(fn);
}

private static class ResultEntry {
private static class CachedEntry {
private final String fieldName;
private final EnumMap<AttributeType,Boolean> resultMap;

ResultEntry(String fieldName) {
private final MemoizedResult indexed;
private final MemoizedResult reverseIndexed;
private final MemoizedResult stored;
private final MemoizedResult indexedOnly;
private final MemoizedResult tokenized;
private final MemoizedResult reverseTokenized;

private CachedEntry(String fieldName) {
this.fieldName = fieldName;
this.resultMap = new EnumMap<>(AttributeType.class);
this.indexed = new MemoizedResult();
this.reverseIndexed = new MemoizedResult();
this.stored = new MemoizedResult();
this.indexedOnly = new MemoizedResult();
this.tokenized = new MemoizedResult();
this.reverseTokenized = new MemoizedResult();
}

private MemoizedResult get(AttributeType attributeType) {
MemoizedResult result;
switch (attributeType) {
case INDEX_ONLY_FIELD:
result = indexedOnly;
break;
case INDEXED_FIELD:
result = indexed;
break;
case REVERSE_INDEXED_FIELD:
result = reverseIndexed;
break;
case TOKENIZED_FIELD:
result = tokenized;
break;
case REVERSE_TOKENIZED_FIELD:
result = reverseTokenized;
break;
case STORED_FIELD:
result = stored;
break;
default:
throw new IllegalArgumentException("Undefined attribute type: " + attributeType);
}
return result;
}

boolean resolveResult(AttributeType attributeType, Function<String,Boolean> evaluateFn) {
return resultMap.computeIfAbsent(attributeType, (t) -> evaluateFn.apply(fieldName));
private class MemoizedResult {
private boolean resultEvaluated;
private boolean result;

private boolean getResultOrEvaluate(Predicate<String> evaluateFn) {
if (!resultEvaluated) {
result = evaluateFn.test(fieldName);
resultEvaluated = true;
}
return result;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
package datawave.ingest.data.config;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
import org.junit.jupiter.params.provider.ValueSource;

public class CachingFieldConfigHelperTest {
Expand Down Expand Up @@ -45,45 +47,63 @@ public void testConstructorWithNonPositiveLimitWillThrow(int limit) {
assertThrows(IllegalArgumentException.class, () -> new CachedFieldConfigHelper(mock(FieldConfigHelper.class), limit));
}

@SuppressWarnings("ClassEscapesDefinedScope")
@ParameterizedTest
@EnumSource(CachedFieldConfigHelper.AttributeType.class)
public void testAttributeTypesDoNotThrow(CachedFieldConfigHelper.AttributeType attributeType) {
String fieldName = "test";
FieldConfigHelper mockHelper = mock(FieldConfigHelper.class);
CachedFieldConfigHelper cachedHelper = new CachedFieldConfigHelper(mockHelper, 1);
cachedHelper.getFieldResult(attributeType, fieldName, (f) -> true);
}

@Test
public void testCachingLimitsBetweenFieldsAndAttributeTypes() {
AtomicLong counter = new AtomicLong();
CachedFieldConfigHelper helper = new CachedFieldConfigHelper(mock(FieldConfigHelper.class), 2);
Function<String,Boolean> fn = (f) -> {
counter.incrementAndGet();
AtomicLong storedCounter = new AtomicLong();
AtomicLong indexCounter = new AtomicLong();
FieldConfigHelper innerHelper = mock(FieldConfigHelper.class);
CachedFieldConfigHelper helper = new CachedFieldConfigHelper(innerHelper, 2);

when(innerHelper.isStoredField(any())).then((a) -> {
storedCounter.incrementAndGet();
return true;
});

when(innerHelper.isIndexedField(any())).then((a) -> {
indexCounter.incrementAndGet();
return true;
};
});

// following ensures that:
// 1. fields are computed, where appropriate per attribute-type
// 2. limit allows cache results to return
// 3. limit blocks results to return if exceeded
// 4. limit functions across attribute-types

helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", fn);
Assertions.assertEquals(1, counter.get(), "field1 should compute result (new field)");
helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", innerHelper::isStoredField);
assertEquals(1, storedCounter.get(), "field1 should compute result (new field)");

helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", fn);
Assertions.assertEquals(1, counter.get(), "field1 repeated (existing field)");
helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", innerHelper::isStoredField);
assertEquals(1, storedCounter.get(), "field1 repeated (existing field)");

helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", fn);
Assertions.assertEquals(2, counter.get(), "field2 should compute result (new field)");
helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", innerHelper::isStoredField);
assertEquals(2, storedCounter.get(), "field2 should compute result (new field)");

helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", fn);
Assertions.assertEquals(2, counter.get(), "field2 repeated (existing)");
helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", innerHelper::isStoredField);
assertEquals(2, storedCounter.get(), "field2 repeated (existing)");

helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.INDEXED_FIELD, "field1", fn);
Assertions.assertEquals(3, counter.get(), "field1 should compute result (new attribute)");
helper.getFieldResult(CachedFieldConfigHelper.AttributeType.INDEXED_FIELD, "field1", innerHelper::isIndexedField);
assertEquals(1, indexCounter.get(), "field1 should compute result (new attribute)");

helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field3", fn);
Assertions.assertEquals(4, counter.get(), "field3 exceeded limit (new field)");
helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field3", innerHelper::isStoredField);
assertEquals(3, storedCounter.get(), "field3 exceeded limit (new field)");

helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field3", fn);
Assertions.assertEquals(4, counter.get(), "field3 exceeded limit (existing field)");
helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field3", innerHelper::isStoredField);
assertEquals(3, storedCounter.get(), "field3 exceeded limit (existing field)");

// LRU map should evict field #2
// we access field #1 above which has more accesses over field #2
helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", fn);
Assertions.assertEquals(5, counter.get(), "field1 exceeded limit (new field/eviction)");
helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", innerHelper::isStoredField);
assertEquals(4, storedCounter.get(), "field1 exceeded limit (new field/eviction)");
}
}

0 comments on commit 47d1cf3

Please sign in to comment.