Skip to content

Commit

Permalink
Index sections (#165)
Browse files Browse the repository at this point in the history
* index sections
* faiss index section + inverted text index section

Co-authored-by: Marty Schoch <[email protected]>
Co-authored-by: Abhi Dangeti <[email protected]>
  • Loading branch information
3 people authored Nov 3, 2023
1 parent 0043e6e commit 7945e8e
Show file tree
Hide file tree
Showing 20 changed files with 2,875 additions and 895 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ on:
push:
branches:
- master
- v15.x
- v14.x
- v13.x
- v12.x
Expand All @@ -12,7 +13,7 @@ jobs:
test:
strategy:
matrix:
go-version: [1.18.x, 1.19.x, 1.20.x]
go-version: [1.19.x, 1.20.x, 1.21.x]
platform: [ubuntu-latest, macos-latest]
runs-on: ${{ matrix.platform }}
steps:
Expand Down
44 changes: 26 additions & 18 deletions build.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (
"github.com/blevesearch/vellum"
)

const Version uint32 = 15

const Version uint32 = 16
const IndexSectionsVersion uint32 = 16
const Type string = "zap"

const fieldNotUninverted = math.MaxUint64
Expand Down Expand Up @@ -98,7 +98,7 @@ func persistSegmentBaseToWriter(sb *SegmentBase, w io.Writer) (int, error) {
return 0, err
}

err = persistFooter(sb.numDocs, sb.storedIndexOffset, sb.fieldsIndexOffset,
err = persistFooter(sb.numDocs, sb.storedIndexOffset, sb.fieldsIndexOffset, sb.sectionsIndexOffset,
sb.docValueOffset, sb.chunkMode, sb.memCRC, br)
if err != nil {
return 0, err
Expand Down Expand Up @@ -159,25 +159,33 @@ func persistStoredFieldValues(fieldID int,

func InitSegmentBase(mem []byte, memCRC uint32, chunkMode uint32,
fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64,
storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64,
dictLocs []uint64) (*SegmentBase, error) {
storedIndexOffset uint64, dictLocs []uint64,
sectionsIndexOffset uint64) (*SegmentBase, error) {
sb := &SegmentBase{
mem: mem,
memCRC: memCRC,
chunkMode: chunkMode,
fieldsMap: fieldsMap,
fieldsInv: fieldsInv,
numDocs: numDocs,
storedIndexOffset: storedIndexOffset,
fieldsIndexOffset: fieldsIndexOffset,
docValueOffset: docValueOffset,
dictLocs: dictLocs,
fieldDvReaders: make(map[uint16]*docValueReader),
fieldFSTs: make(map[uint16]*vellum.FST),
mem: mem,
memCRC: memCRC,
chunkMode: chunkMode,
fieldsMap: fieldsMap,
fieldsInv: fieldsInv,
numDocs: numDocs,
storedIndexOffset: storedIndexOffset,
fieldsIndexOffset: sectionsIndexOffset,
sectionsIndexOffset: sectionsIndexOffset,
fieldDvReaders: make([]map[uint16]*docValueReader, len(segmentSections)),
docValueOffset: 0, // docvalueOffsets identified automicatically by the section
dictLocs: dictLocs,
fieldFSTs: make(map[uint16]*vellum.FST),
}
sb.updateSize()

err := sb.loadDvReaders()
// load the data/section starting offsets for each field
// by via the sectionsIndexOffset as starting point.
err := sb.loadFieldsNew()
if err != nil {
return nil, err
}

err = sb.loadDvReaders()
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/zap/cmd/dict.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (

"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/vellum"
zap "github.com/blevesearch/zapx/v15"
zap "github.com/blevesearch/zapx/v16"
"github.com/spf13/cobra"
)

Expand Down
2 changes: 1 addition & 1 deletion cmd/zap/cmd/docvalue.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (
"sort"
"strconv"

zap "github.com/blevesearch/zapx/v15"
zap "github.com/blevesearch/zapx/v16"
"github.com/golang/snappy"
"github.com/spf13/cobra"
)
Expand Down
2 changes: 1 addition & 1 deletion cmd/zap/cmd/explore.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (

"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/vellum"
zap "github.com/blevesearch/zapx/v15"
zap "github.com/blevesearch/zapx/v16"
"github.com/spf13/cobra"
)

Expand Down
2 changes: 1 addition & 1 deletion cmd/zap/cmd/fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
"encoding/binary"
"fmt"

zap "github.com/blevesearch/zapx/v15"
zap "github.com/blevesearch/zapx/v16"
"github.com/spf13/cobra"
)

Expand Down
2 changes: 1 addition & 1 deletion cmd/zap/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
"fmt"
"os"

zap "github.com/blevesearch/zapx/v15"
zap "github.com/blevesearch/zapx/v16"
"github.com/spf13/cobra"
)

Expand Down
2 changes: 1 addition & 1 deletion cmd/zap/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
package main

import (
"github.com/blevesearch/zapx/v15/cmd/zap/cmd"
"github.com/blevesearch/zapx/v16/cmd/zap/cmd"
)

func main() {
Expand Down
3 changes: 1 addition & 2 deletions docvalues.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
s.incrementBytesRead(offset)
// set the data offset
fdvIter.dvDataLoc = fieldDvLocStart

return fdvIter, nil
}

Expand Down Expand Up @@ -310,7 +309,7 @@ func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string,
continue
}
fieldID := fieldIDPlus1 - 1
if dvIter, exists := s.fieldDvReaders[fieldID]; exists &&
if dvIter, exists := s.fieldDvReaders[sectionInvertedTextIndex][fieldID]; exists &&
dvIter != nil {
dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID])
}
Expand Down
Loading

0 comments on commit 7945e8e

Please sign in to comment.