Skip to content

Commit

Permalink
Update to lucene 10.1.0 (#791)
Browse files Browse the repository at this point in the history
* Update to lucene 10.0.0

* Update to lucene 10.1.0
  • Loading branch information
aprudhomme authored Jan 8, 2025
1 parent 1c2d664 commit e8772e1
Show file tree
Hide file tree
Showing 49 changed files with 442 additions and 555 deletions.
4 changes: 2 additions & 2 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ aws = "1.12.768"
grpc = "1.66.0"
jackson = "2.17.2"
log4j = "2.23.1"
lucene = "9.12.0"
lucene = "10.1.0"
prometheus = "1.3.1"
protobuf = "3.25.3"

Expand Down Expand Up @@ -59,7 +59,7 @@ grpc-inprocess = { module = "io.grpc:grpc-inprocess", version.ref = "grpc" }
grpc-testing = { module = "io.grpc:grpc-testing", version.ref = "grpc" }
junit = { module = "junit:junit", version = "4.13.2" }
lucene-test-framework = { module = "org.apache.lucene:lucene-test-framework", version.ref = "lucene" }
mockito-core = { module = "org.mockito:mockito-core", version = "5.12.0" }
mockito-core = { module = "org.mockito:mockito-core", version = "5.14.2" }
s3mock = { module = "io.findify:s3mock_2.13", version = "0.2.6" }
spatial4j = { module = "org.locationtech.spatial4j:spatial4j", version = "0.8" }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;

/** Implements per-index {@link Codec}. */
public class ServerCodec extends Lucene912Codec {
public class ServerCodec extends Lucene101Codec {
private final IndexStateManager stateManager;

// nocommit expose compression control
Expand Down
24 changes: 14 additions & 10 deletions src/main/java/com/yelp/nrtsearch/server/doc/LoadedDocValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -1075,20 +1075,22 @@ public ByteVectorType getValue() {
*/
public static final class SingleSearchVector extends LoadedDocValues<FloatVectorType> {
private final FloatVectorValues vectorValues;
private final KnnVectorValues.DocIndexIterator vectorIterator;
private FloatVectorType value = null;

public SingleSearchVector(FloatVectorValues vectorValues) {
this.vectorValues = vectorValues;
this.vectorIterator = vectorValues != null ? vectorValues.iterator() : null;
}

@Override
public void setDocId(int docID) throws IOException {
if (vectorValues != null) {
if (vectorValues.docID() < docID) {
vectorValues.advance(docID);
if (vectorIterator != null) {
if (vectorIterator.docID() < docID) {
vectorIterator.advance(docID);
}
if (vectorValues.docID() == docID) {
value = new FloatVectorType(vectorValues.vectorValue());
if (vectorIterator.docID() == docID) {
value = new FloatVectorType(vectorValues.vectorValue(vectorIterator.index()));
} else {
value = null;
}
Expand Down Expand Up @@ -1133,20 +1135,22 @@ public FloatVectorType getValue() {
*/
public static final class SingleSearchByteVector extends LoadedDocValues<ByteVectorType> {
private final ByteVectorValues vectorValues;
private final KnnVectorValues.DocIndexIterator vectorIterator;
private ByteVectorType value = null;

public SingleSearchByteVector(ByteVectorValues vectorValues) {
this.vectorValues = vectorValues;
this.vectorIterator = vectorValues != null ? vectorValues.iterator() : null;
}

@Override
public void setDocId(int docID) throws IOException {
if (vectorValues != null) {
if (vectorValues.docID() < docID) {
vectorValues.advance(docID);
if (vectorIterator != null) {
if (vectorIterator.docID() < docID) {
vectorIterator.advance(docID);
}
if (vectorValues.docID() == docID) {
value = new ByteVectorType(vectorValues.vectorValue());
if (vectorIterator.docID() == docID) {
value = new ByteVectorType(vectorValues.vectorValue(vectorIterator.index()));
} else {
value = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,8 @@ private static com.yelp.nrtsearch.server.grpc.FacetResult getScriptFacetResult(
int totalDocs = 0;
// run script against all match docs, and aggregate counts
for (MatchingDocs matchingDocs : drillDowns.getMatchingDocs()) {
FacetScript script = segmentFactory.newInstance(matchingDocs.context);
DocIdSetIterator iterator = matchingDocs.bits.iterator();
FacetScript script = segmentFactory.newInstance(matchingDocs.context());
DocIdSetIterator iterator = matchingDocs.bits().iterator();
if (iterator == null) {
continue;
}
Expand All @@ -216,8 +216,8 @@ private static com.yelp.nrtsearch.server.grpc.FacetResult getDocValuesFacetResul
int totalDocs = 0;
// get doc values for all match docs, and aggregate counts
for (MatchingDocs matchingDocs : drillDowns.getMatchingDocs()) {
LoadedDocValues<?> docValues = fieldDef.getDocValues(matchingDocs.context);
DocIdSetIterator iterator = matchingDocs.bits.iterator();
LoadedDocValues<?> docValues = fieldDef.getDocValues(matchingDocs.context());
DocIdSetIterator iterator = matchingDocs.bits().iterator();
if (iterator == null) {
continue;
}
Expand Down Expand Up @@ -462,7 +462,7 @@ private static com.yelp.nrtsearch.server.grpc.FacetResult getFieldFacetResult(
luceneFacets =
new FastTaxonomyFacetCounts(
indexFieldName,
searcherAndTaxonomyManager.taxonomyReader,
searcherAndTaxonomyManager.taxonomyReader(),
indexState.getFacetsConfig(),
c);
} else {
Expand All @@ -478,7 +478,7 @@ private static com.yelp.nrtsearch.server.grpc.FacetResult getFieldFacetResult(
luceneFacets =
new FastTaxonomyFacetCounts(
indexFieldName,
searcherAndTaxonomyManager.taxonomyReader,
searcherAndTaxonomyManager.taxonomyReader(),
indexState.getFacetsConfig(),
drillDowns);
indexFieldNameToFacets.put(indexFieldName, luceneFacets);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public class FilteredSSDVFacetCounts extends Facets {
final String field;
final Map<Long, Integer> globalOrdinalToCountIndex;
final List<String> values;
final int[] counts;
int[] counts;

/**
* Facet to count based on sorted set doc values, but only considering the provided values.
Expand All @@ -76,7 +76,6 @@ public FilteredSSDVFacetCounts(
this.field = state.getField();
this.values = values;
dv = state.getDocValues();
counts = new int[values.size()];

// find mapping to go from global ordinal to the value count index
globalOrdinalToCountIndex = new HashMap<>();
Expand All @@ -98,6 +97,12 @@ public FilteredSSDVFacetCounts(
}
}

private void initializeCounts() {
if (counts == null) {
counts = new int[values.size()];
}
}

/** Does all the "real work" of tallying up the counts. */
private void count(List<MatchingDocs> matchingDocs) throws IOException {
OrdinalMap ordinalMap;
Expand All @@ -117,12 +122,12 @@ private void count(List<MatchingDocs> matchingDocs) throws IOException {
// the top-level reader passed to the
// SortedSetDocValuesReaderState, else cryptic
// AIOOBE can happen:
if (ReaderUtil.getTopLevelContext(hits.context).reader() != reader) {
if (ReaderUtil.getTopLevelContext(hits.context()).reader() != reader) {
throw new IllegalStateException(
"the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader");
}

countOneSegment(ordinalMap, hits.context.reader(), hits.context.ord, hits, null);
countOneSegment(ordinalMap, hits.context().reader(), hits.context().ord, hits, null);
}
}

Expand All @@ -135,6 +140,9 @@ private void countOneSegmentNHLD(OrdinalMap ordinalMap, LeafReader reader, int s
return;
}

// Initialize counts:
initializeCounts();

// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
Expand Down Expand Up @@ -177,7 +185,7 @@ private void countOneSegmentNHLD(OrdinalMap ordinalMap, LeafReader reader, int s
if (count != 0) {
Integer countIndex = globalOrdinalToCountIndex.get(ordMap.get(ord));
if (countIndex != null) {
counts[countIndex]++;
counts[countIndex] += count;
}
}
}
Expand Down Expand Up @@ -212,12 +220,19 @@ private void countOneSegmentNHLD(OrdinalMap ordinalMap, LeafReader reader, int s
private void countOneSegment(
OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits, Bits liveDocs)
throws IOException {
if (hits != null && hits.totalHits() == 0) {
return;
}

SortedSetDocValues multiValues = DocValues.getSortedSet(reader, field);
if (multiValues == null) {
// nothing to count
return;
}

// Initialize counts:
initializeCounts();

// It's slightly more efficient to work against SortedDocValues if the field is actually
// single-valued (see: LUCENE-5309)
SortedDocValues singleValues = DocValues.unwrapSingleton(multiValues);
Expand All @@ -229,7 +244,7 @@ private void countOneSegment(
it = FacetUtils.liveDocsDISI(valuesIt, liveDocs);
;
} else {
it = ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), valuesIt));
it = ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits().iterator(), valuesIt));
}

// TODO: yet another option is to count all segs
Expand All @@ -246,7 +261,7 @@ private void countOneSegment(

int numSegOrds = (int) multiValues.getValueCount();

if (hits != null && hits.totalHits < numSegOrds / 10) {
if (hits != null && hits.totalHits() < numSegOrds / 10) {
// Remap every ord to global ord as we iterate:
if (singleValues != null) {
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
Expand Down Expand Up @@ -288,7 +303,7 @@ private void countOneSegment(
if (count != 0) {
Integer countIndex = globalOrdinalToCountIndex.get(ordMap.get(ord));
if (countIndex != null) {
counts[countIndex]++;
counts[countIndex] += count;
}
}
}
Expand Down Expand Up @@ -337,8 +352,7 @@ private void countAll() throws IOException {
if (liveDocs == null) {
countOneSegmentNHLD(ordinalMap, context.reader(), context.ord);
} else {
countOneSegment(
ordinalMap, context.reader(), context.ord, null, context.reader().getLiveDocs());
countOneSegment(ordinalMap, context.reader(), context.ord, null, liveDocs);
}
}
}
Expand Down Expand Up @@ -366,14 +380,14 @@ private FacetResult getDim(String dim, int topN) throws IOException {
int dimCount = 0;
int childCount = 0;

TopOrdAndIntQueue.OrdAndValue reuse = null;
TopOrdAndIntQueue.OrdAndInt reuse = null;
for (int ord = 0; ord < counts.length; ord++) {
if (counts[ord] > 0) {
dimCount += counts[ord];
childCount++;
if (counts[ord] > bottomCount) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
reuse = new TopOrdAndIntQueue.OrdAndInt();
}
reuse.ord = ord;
reuse.value = counts[ord];
Expand All @@ -382,9 +396,9 @@ private FacetResult getDim(String dim, int topN) throws IOException {
// sparse case unnecessarily
q = new TopOrdAndIntQueue(topN);
}
reuse = q.insertWithOverflow(reuse);
reuse = (TopOrdAndIntQueue.OrdAndInt) q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
bottomCount = ((TopOrdAndIntQueue.OrdAndInt) q.top()).value;
}
}
}
Expand All @@ -396,7 +410,7 @@ private FacetResult getDim(String dim, int topN) throws IOException {

LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for (int i = labelValues.length - 1; i >= 0; i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
TopOrdAndIntQueue.OrdAndInt ordAndValue = (TopOrdAndIntQueue.OrdAndInt) q.pop();
labelValues[i] = new LabelAndValue(values.get(ordAndValue.ord), ordAndValue.value);
}
return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.document.Completion912PostingsFormat;
import org.apache.lucene.search.suggest.document.Completion101PostingsFormat;
import org.apache.lucene.search.suggest.document.ContextSuggestField;

public class ContextSuggestFieldDef extends IndexableFieldDef<Void> {
Expand All @@ -45,7 +45,7 @@ protected ContextSuggestFieldDef(
this.indexAnalyzer = this.parseIndexAnalyzer(requestField);
this.searchAnalyzer = this.parseSearchAnalyzer(requestField);
this.postingsFormat =
new Completion912PostingsFormat(context.config().getCompletionCodecLoadMode());
new Completion101PostingsFormat(context.config().getCompletionCodecLoadMode());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ private CreateSnapshotResponse createSnapshot(
// search is done:
long t0 = System.nanoTime();
IndexReader r =
DirectoryReader.openIfChanged((DirectoryReader) s2.searcher.getIndexReader(), c);
DirectoryReader.openIfChanged((DirectoryReader) s2.searcher().getIndexReader(), c);
IndexSearcher s = new IndexSearcher(r);
try {
shardState.slm.record(s);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,22 +88,22 @@ private CopyState handle(IndexState indexState, CopyStateRequest copyStateReques
private static CopyState writeCopyState(org.apache.lucene.replicator.nrt.CopyState state)
throws IOException {
CopyState.Builder builder = CopyState.newBuilder();
builder.setInfoBytesLength(state.infosBytes.length);
builder.setInfoBytes(ByteString.copyFrom(state.infosBytes, 0, state.infosBytes.length));
builder.setInfoBytesLength(state.infosBytes().length);
builder.setInfoBytes(ByteString.copyFrom(state.infosBytes(), 0, state.infosBytes().length));

builder.setGen(state.gen);
builder.setVersion(state.version);
builder.setGen(state.gen());
builder.setVersion(state.version());

FilesMetadata filesMetadata = writeFilesMetaData(state.files);
FilesMetadata filesMetadata = writeFilesMetaData(state.files());
builder.setFilesMetadata(filesMetadata);

builder.setCompletedMergeFilesSize(state.completedMergeFiles.size());
builder.setCompletedMergeFilesSize(state.completedMergeFiles().size());

for (String fileName : state.completedMergeFiles) {
for (String fileName : state.completedMergeFiles()) {
builder.addCompletedMergeFiles(fileName);
}

builder.setPrimaryGen(state.primaryGen);
builder.setPrimaryGen(state.primaryGen());

return builder.build();
}
Expand All @@ -117,12 +117,12 @@ public static FilesMetadata writeFilesMetaData(Map<String, FileMetaData> files)
fileMetadataBuilder.setFileName(ent.getKey());

FileMetaData fmd = ent.getValue();
fileMetadataBuilder.setLen(fmd.length);
fileMetadataBuilder.setChecksum(fmd.checksum);
fileMetadataBuilder.setHeaderLength(fmd.header.length);
fileMetadataBuilder.setHeader(ByteString.copyFrom(fmd.header, 0, fmd.header.length));
fileMetadataBuilder.setFooterLength(fmd.footer.length);
fileMetadataBuilder.setFooter(ByteString.copyFrom(fmd.footer, 0, fmd.footer.length));
fileMetadataBuilder.setLen(fmd.length());
fileMetadataBuilder.setChecksum(fmd.checksum());
fileMetadataBuilder.setHeaderLength(fmd.header().length);
fileMetadataBuilder.setHeader(ByteString.copyFrom(fmd.header(), 0, fmd.header().length));
fileMetadataBuilder.setFooterLength(fmd.footer().length);
fileMetadataBuilder.setFooter(ByteString.copyFrom(fmd.footer(), 0, fmd.footer().length));
builder.addFileMetadata(fileMetadataBuilder.build());
}
return builder.build();
Expand Down
Loading

0 comments on commit e8772e1

Please sign in to comment.