Skip to content

Commit

Permalink
code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Thejas-bhat committed Nov 20, 2023
1 parent f64db35 commit f4a96c2
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 96 deletions.
96 changes: 52 additions & 44 deletions cmd/zap/cmd/docvalue.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,51 @@ func dumpDocValueResults(data []byte, args []string, field string, id int, field
return nil
}

func docValueCmd(cmd *cobra.Command, args []string) error {
if len(args) < 1 {
return fmt.Errorf("must specify index file path")
}

data := segment.Data()
// iterate through fields index
pos := segment.FieldsIndexOffset()
if pos == 0 {
// this is the case only for older file formats
return fmt.Errorf("file format not supported")
}

fieldInv, fieldSectionMap, err := getSectionFields(data, pos)
if err != nil {
return fmt.Errorf("error while getting fields and sections info %v", err)
}
// if no fields are specified then print the docValue offsets for all fields set
for id, field := range fieldInv {
fieldStartLoc, fieldEndLoc, err := getDvOffsets(data, id, fieldSectionMap)
if err != nil {
return err
}

if fieldStartLoc == math.MaxUint64 && len(args) == 1 {
fmt.Printf("FieldID: %d '%s' docvalue at %d (%x) not "+
" persisted \n", id, field, fieldStartLoc, fieldStartLoc)
continue
}
err = dumpDocValueResults(data, args, field, id, fieldEndLoc, fieldStartLoc)
if err != nil {
return err
}
}
return nil
}

// docvalueCmd represents the docvalue command
var docvalueCmd = &cobra.Command{
Use: "docvalue [path] <field> optional <docNum> optional",
Short: "docvalue prints the docvalue details by field, and docNum",
Long: `The docvalue command lets you explore the docValues in order of field and by doc number.`,
RunE: docValueCmd,
}

func loadFieldData(data []byte, pos uint64, fieldID uint64, sectionMap map[uint16]uint64, fieldsInv []string) ([]string, error) {
fieldNameLen, sz := binary.Uvarint(data[pos : pos+binary.MaxVarintLen64])
pos += uint64(sz)
Expand All @@ -209,64 +254,35 @@ func loadFieldData(data []byte, pos uint64, fieldID uint64, sectionMap map[uint1
return fieldsInv, nil
}

func docValueCmd(cmd *cobra.Command, args []string) error {
if len(args) < 1 {
return fmt.Errorf("must specify index file path")
}

data := segment.Data()
log.Printf("hello\n")
// iterate through fields index
func getSectionFields(data []byte, pos uint64) ([]string, []map[uint16]uint64, error) {
var fieldID uint64
var err error
var fieldSectionMap []map[uint16]uint64
var fieldInv []string
pos := segment.FieldsIndexOffset()
if pos == 0 {
// this is the case only for older file formats
return fmt.Errorf("file format not supported?")
}

// read the number of fields
numFields, sz := binary.Uvarint(data[pos : pos+binary.MaxVarintLen64])
pos += uint64(sz)
var fieldID uint64
var err error
var fieldSectionMap []map[uint16]uint64

for fieldID < numFields {
sectionMap := make(map[uint16]uint64)
addr := binary.BigEndian.Uint64(data[pos : pos+8])

fieldInv, err = loadFieldData(data, addr, fieldID, sectionMap, fieldInv)
if err != nil {
return err
return nil, nil, err
}
fieldSectionMap = append(fieldSectionMap, sectionMap)
fieldID++
pos += 8
}

// if no fields are specified then print the docValue offsets for all fields set
for id, field := range fieldInv {
fieldStartLoc, fieldEndLoc, err := getDvOffsets(data, id, fieldSectionMap)
if err != nil {
return err
}

if fieldStartLoc == math.MaxUint64 && len(args) == 1 {
fmt.Printf("FieldID: %d '%s' docvalue at %d (%x) not "+
" persisted \n", id, field, fieldStartLoc, fieldStartLoc)
continue
}
err = dumpDocValueResults(data, args, field, id, fieldEndLoc, fieldStartLoc)
if err != nil {
return err
}
}
return nil
return fieldInv, fieldSectionMap, nil
}

func getDvOffsets(data []byte, id int, fieldSectionMap []map[uint16]uint64) (uint64, uint64, error) {
var read uint64
pos := fieldSectionMap[id][sectionInvertedTextIndex]
pos := fieldSectionMap[id][zap.SectionInvertedTextIndex]
fieldStartLoc, n := binary.Uvarint(
data[pos : pos+binary.MaxVarintLen64])
if n <= 0 {
Expand All @@ -284,14 +300,6 @@ func getDvOffsets(data []byte, id int, fieldSectionMap []map[uint16]uint64) (uin
return fieldStartLoc, fieldEndLoc, nil
}

// docvalueCmd represents the docvalue command
var docvalueCmd = &cobra.Command{
Use: "docvalue [path] <field> optional <docNum> optional",
Short: "docvalue prints the docvalue details by field, and docNum",
Long: `The docvalue command lets you explore the docValues in order of field and by doc number.`,
RunE: docValueCmd,
}

func getDocValueLocs(docNum uint64, metaHeader []zap.MetaData) (uint64, uint64) {
i := sort.Search(len(metaHeader), func(i int) bool {
return metaHeader[i].DocNum >= docNum
Expand Down
33 changes: 7 additions & 26 deletions cmd/zap/cmd/fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
package cmd

import (
"encoding/binary"
"fmt"

zap "github.com/blevesearch/zapx/v16"
"github.com/spf13/cobra"
)

Expand All @@ -32,42 +32,23 @@ var fieldsCmd = &cobra.Command{
}

data := segment.Data()

// iterate through fields index
var fieldInv []string
pos := segment.FieldsIndexOffset()
if pos == 0 {
// this is the case only for older file formats
return fmt.Errorf("file format not supported?")
return fmt.Errorf("file format not supported")
}

// read the number of fields
numFields, sz := binary.Uvarint(data[pos : pos+binary.MaxVarintLen64])
pos += uint64(sz)
var fieldID uint64
var err error
var fieldSectionMap []map[uint16]uint64

for fieldID < numFields {
sectionMap := make(map[uint16]uint64)
addr := binary.BigEndian.Uint64(data[pos : pos+8])

fieldInv, err = loadFieldData(data, addr, fieldID, sectionMap, fieldInv)
if err != nil {
return err
}
fieldSectionMap = append(fieldSectionMap, sectionMap)
fieldID++
pos += 8
fieldInv, fieldSectionMap, err := getSectionFields(data, pos)
if err != nil {
return fmt.Errorf("error while getting the sections and field info %v", err)
}

for fieldID, field := range fieldInv {
for sectionType, sectionAddr := range fieldSectionMap[fieldID] {
if sectionAddr > 0 {
switch sectionType {
case sectionInvertedTextIndex:
case zap.SectionInvertedTextIndex:
fmt.Printf("field %d '%s' text index starts at %d (%x)\n", fieldID, field, sectionAddr, sectionAddr)
case sectionFaissVectorIndex:
case zap.SectionFaissVectorIndex:
fmt.Printf("field %d '%s' vector index starts at %d (%x)\n", fieldID, field, sectionAddr, sectionAddr)
}
}
Expand Down
5 changes: 0 additions & 5 deletions cmd/zap/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,6 @@ import (

var segment *zap.Segment

const (
sectionInvertedTextIndex = iota
sectionFaissVectorIndex
)

// RootCmd represents the base command when called without any subcommands
var RootCmd = &cobra.Command{
Use: "zap",
Expand Down
3 changes: 2 additions & 1 deletion cmd/zap/cmd/vector.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (

"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/go-faiss"
zap "github.com/blevesearch/zapx/v16"
"github.com/spf13/cobra"
)

Expand Down Expand Up @@ -63,7 +64,7 @@ var vectorCmd = &cobra.Command{
return fmt.Errorf("error while parsing the field data %v", err)
}
if fieldInv[len(fieldInv)-1] == args[1] {
vectorSectionOffset, ok := fieldSectionMap[uint16(sectionFaissVectorIndex)]
vectorSectionOffset, ok := fieldSectionMap[uint16(zap.SectionFaissVectorIndex)]
if !ok {
return fmt.Errorf("the specified field doesn't have a vector section in it.")
}
Expand Down
2 changes: 1 addition & 1 deletion docvalues.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string,
continue
}
fieldID := fieldIDPlus1 - 1
if dvIter, exists := s.fieldDvReaders[sectionInvertedTextIndex][fieldID]; exists &&
if dvIter, exists := s.fieldDvReaders[SectionInvertedTextIndex][fieldID]; exists &&
dvIter != nil {
dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID])
}
Expand Down
2 changes: 1 addition & 1 deletion faiss_vector_posting.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ func (sb *SegmentBase) SimilarVectors(field string, qVector []float32, k int64,
vecDocIDMap := make(map[int64][]uint32)
fieldIDPlus1 := sb.fieldsMap[field]
if fieldIDPlus1 > 0 {
vectorSection := sb.fieldsSectionsMap[fieldIDPlus1-1][sectionFaissVectorIndex]
vectorSection := sb.fieldsSectionsMap[fieldIDPlus1-1][SectionFaissVectorIndex]
// check if the field has a vector section in the segment.
if vectorSection > 0 {
pos := int(vectorSection)
Expand Down
2 changes: 1 addition & 1 deletion faiss_vector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ func TestVectorSegment(t *testing.T) {
}

fieldsSectionsMap := vecSegBase.fieldsSectionsMap
stubVecFieldStartAddr := fieldsSectionsMap[vecSegBase.fieldsMap["stubVec"]-1][sectionFaissVectorIndex]
stubVecFieldStartAddr := fieldsSectionsMap[vecSegBase.fieldsMap["stubVec"]-1][SectionFaissVectorIndex]
docValueStart, docValueEnd, indexBytesLen, _,
numVecs, _ := getSectionContentOffsets(vecSegBase, stubVecFieldStartAddr)

Expand Down
4 changes: 2 additions & 2 deletions section.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ type resetable interface {
// -----------------------------------------------------------------------------

const (
sectionInvertedTextIndex = iota
sectionFaissVectorIndex
SectionInvertedTextIndex = iota
SectionFaissVectorIndex
)

// -----------------------------------------------------------------------------
Expand Down
10 changes: 5 additions & 5 deletions section_faiss_vector_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
)

func init() {
registerSegmentSection(sectionFaissVectorIndex, &faissVectorIndexSection{})
registerSegmentSection(SectionFaissVectorIndex, &faissVectorIndexSection{})
}

type faissVectorIndexSection struct {
Expand Down Expand Up @@ -100,7 +100,7 @@ LOOP:
// check if the section address is a valid one for "fieldName" in the
// segment sb. the local fieldID (fetched by the fieldsMap of the sb)
// is to be used while consulting the fieldsSectionsMap
pos := int(sb.fieldsSectionsMap[sb.fieldsMap[fieldName]-1][sectionFaissVectorIndex])
pos := int(sb.fieldsSectionsMap[sb.fieldsMap[fieldName]-1][SectionFaissVectorIndex])
if pos == 0 {
continue LOOP
}
Expand Down Expand Up @@ -512,10 +512,10 @@ func (v *vectorIndexOpaque) allocateSpace() {
}

func (v *faissVectorIndexSection) getvectorIndexOpaque(opaque map[int]resetable) *vectorIndexOpaque {
if _, ok := opaque[sectionFaissVectorIndex]; !ok {
opaque[sectionFaissVectorIndex] = v.InitOpaque(nil)
if _, ok := opaque[SectionFaissVectorIndex]; !ok {
opaque[SectionFaissVectorIndex] = v.InitOpaque(nil)
}
return opaque[sectionFaissVectorIndex].(*vectorIndexOpaque)
return opaque[SectionFaissVectorIndex].(*vectorIndexOpaque)
}

func (v *faissVectorIndexSection) InitOpaque(args map[string]interface{}) resetable {
Expand Down
10 changes: 5 additions & 5 deletions section_inverted_text_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
)

func init() {
registerSegmentSection(sectionInvertedTextIndex, &invertedTextIndexSection{})
registerSegmentSection(SectionInvertedTextIndex, &invertedTextIndexSection{})
}

type invertedTextIndexSection struct {
Expand Down Expand Up @@ -282,7 +282,7 @@ func mergeAndPersistInvertedSection(segments []*SegmentBase, dropsIn []*roaring.
}

fieldIDPlus1 := uint16(segment.fieldsMap[fieldName])
if dvIter, exists := segment.fieldDvReaders[sectionInvertedTextIndex][fieldIDPlus1-1]; exists &&
if dvIter, exists := segment.fieldDvReaders[SectionInvertedTextIndex][fieldIDPlus1-1]; exists &&
dvIter != nil {
fdvReadersAvailable = true
dvIterClone = dvIter.cloneInto(dvIterClone)
Expand Down Expand Up @@ -820,10 +820,10 @@ func (i *invertedIndexOpaque) allocateSpace() {
}

func (i *invertedTextIndexSection) getInvertedIndexOpaque(opaque map[int]resetable) *invertedIndexOpaque {
if _, ok := opaque[sectionInvertedTextIndex]; !ok {
opaque[sectionInvertedTextIndex] = i.InitOpaque(nil)
if _, ok := opaque[SectionInvertedTextIndex]; !ok {
opaque[SectionInvertedTextIndex] = i.InitOpaque(nil)
}
return opaque[sectionInvertedTextIndex].(*invertedIndexOpaque)
return opaque[SectionInvertedTextIndex].(*invertedIndexOpaque)
}

func (i *invertedIndexOpaque) getOrDefineField(fieldName string) int {
Expand Down
10 changes: 5 additions & 5 deletions segment.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ func (s *SegmentBase) loadFieldNew(fieldID uint16, addr uint64,
fieldSectionAddr := binary.BigEndian.Uint64(s.mem[pos : pos+8])
pos += 8
fieldSectionMap[fieldSectionType] = fieldSectionAddr
if fieldSectionType == sectionInvertedTextIndex {
if fieldSectionType == SectionInvertedTextIndex {
// for the fields which don't have the inverted index, the offset is
// 0 and during query time, because there is no valid dictionary we
// will just have follow a no-op path.
Expand Down Expand Up @@ -765,12 +765,12 @@ func (s *Segment) loadDvReadersLegacy() error {
if fieldDvReader != nil {
// older file formats have docValues corresponding only to inverted index
// ignore the rest.
if s.fieldDvReaders[sectionInvertedTextIndex] == nil {
s.fieldDvReaders[sectionInvertedTextIndex] = make(map[uint16]*docValueReader)
if s.fieldDvReaders[SectionInvertedTextIndex] == nil {
s.fieldDvReaders[SectionInvertedTextIndex] = make(map[uint16]*docValueReader)
}
// fix the structure of fieldDvReaders
// currently it populates the inverted index doc values
s.fieldDvReaders[sectionInvertedTextIndex][uint16(fieldID)] = fieldDvReader
s.fieldDvReaders[SectionInvertedTextIndex][uint16(fieldID)] = fieldDvReader
s.fieldDvNames = append(s.fieldDvNames, s.fieldsInv[fieldID])
}
}
Expand Down Expand Up @@ -832,7 +832,7 @@ func (s *SegmentBase) loadDvReaders() error {
return fmt.Errorf("loadDvReaders: failed to read the dataLoc "+
"offset for sectionID %v field %v", secID, s.fieldsInv[fieldID])
}
if secID == sectionInvertedTextIndex {
if secID == SectionInvertedTextIndex {
s.dictLocs = append(s.dictLocs, dataLoc)
}
fieldDvReader, err := s.loadFieldDocValueReader(s.fieldsInv[fieldID], fieldLocStart, fieldLocEnd)
Expand Down

0 comments on commit f4a96c2

Please sign in to comment.