Skip to content

Commit

Permalink
refactoring code, better reuse of objects
Browse files Browse the repository at this point in the history
  • Loading branch information
Thejas-bhat committed Sep 11, 2023
1 parent 037a94f commit bd6d08f
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 70 deletions.
25 changes: 2 additions & 23 deletions new.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,15 +149,6 @@ func (s *interim) reset() (err error) {
return err
}

func (s *interim) grabBuf(size int) []byte {
buf := s.tmp0
if cap(buf) < size {
buf = make([]byte, size)
s.tmp0 = buf
}
return buf[0:size]
}

type interimStoredField struct {
vals [][]byte
typs []byte
Expand Down Expand Up @@ -248,26 +239,14 @@ func (s *interim) getOrDefineField(fieldName string) int {
}

func (s *interim) processDocuments() {
numFields := len(s.FieldsInv)
reuseFieldLens := make([]int, numFields)
reuseFieldTFs := make([]index.TokenFrequencies, numFields)

for docNum, result := range s.results {
for i := 0; i < numFields; i++ { // clear these for reuse
reuseFieldLens[i] = 0
reuseFieldTFs[i] = nil
}

s.processDocument(uint64(docNum), result,
reuseFieldLens, reuseFieldTFs)
s.processDocument(uint64(docNum), result)
}
}

func (s *interim) processDocument(docNum uint64,
result index.Document,
fieldLens []int, fieldTFs []index.TokenFrequencies) {
result index.Document) {
visitField := func(field index.Field) {

fieldID := uint16(s.getOrDefineField(field.Name()))

// section specific processing of the field
Expand Down
28 changes: 15 additions & 13 deletions section_inverted_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ func (i *invertedIndexSection) Process(opaque map[int]resetable, docNum uint64,

func (i *invertedIndexSection) Persist(opaque map[int]resetable, w *CountHashWriter) (n int64, err error) {
invIndexOpaque := i.getInvertedIndexOpaque(opaque)
_, _ = invIndexOpaque.writeDicts(w)
return 0, nil
_, err = invIndexOpaque.writeDicts(w)
return 0, err
}

func (i *invertedIndexSection) AddrForField(opaque map[int]resetable, fieldID int) int {
Expand Down Expand Up @@ -353,7 +353,6 @@ func (i *invertedIndexSection) Merge(opaque map[int]resetable, segments []*Segme
return nil
}

// todo: is it possible to merge this resuable stuff with the interim's tmp0?
func (i *invertedIndexOpaque) grabBuf(size int) []byte {
buf := i.tmp0
if cap(buf) < size {
Expand Down Expand Up @@ -561,8 +560,6 @@ func (io *invertedIndexOpaque) writeDicts(w *CountHashWriter) (dictOffsets []uin

fieldStart := w.Count()

// todo: uvarint these offsets

n = binary.PutUvarint(buf, fdvOffsetsStart[fieldID])
_, err = w.Write(buf[:n])
if err != nil {
Expand Down Expand Up @@ -645,13 +642,6 @@ func (io *invertedIndexOpaque) process(field index.Field, fieldID uint16, docNum
return
}

if len(io.reusableFieldTFs) == 0 {
io.reusableFieldTFs = make([]index.TokenFrequencies, len(io.FieldsInv))
}
if len(io.reusableFieldLens) == 0 {
io.reusableFieldLens = make([]int, len(io.FieldsInv))
}

io.reusableFieldLens[fieldID] += field.AnalyzedLength()

existingFreqs := io.reusableFieldTFs[fieldID]
Expand Down Expand Up @@ -794,6 +784,18 @@ func (i *invertedIndexOpaque) prepareDicts() {
for _, dict := range i.DictKeys {
sort.Strings(dict)
}

if cap(i.reusableFieldTFs) >= len(i.FieldsInv) {
i.reusableFieldTFs = i.reusableFieldTFs[:len(i.FieldsInv)]
} else {
i.reusableFieldTFs = make([]index.TokenFrequencies, len(i.FieldsInv))
}

if cap(i.reusableFieldLens) >= len(i.FieldsInv) {
i.reusableFieldLens = i.reusableFieldLens[:len(i.FieldsInv)]
} else {
i.reusableFieldLens = make([]int, len(i.FieldsInv))
}
}

func (i *invertedIndexSection) getInvertedIndexOpaque(opaque map[int]resetable) *invertedIndexOpaque {
Expand All @@ -803,7 +805,6 @@ func (i *invertedIndexSection) getInvertedIndexOpaque(opaque map[int]resetable)
return opaque[sectionInvertedIndex].(*invertedIndexOpaque)
}

// revisit this function's purpose etc.
func (i *invertedIndexOpaque) getOrDefineField(fieldName string) int {
fieldIDPlus1, exists := i.FieldsMap[fieldName]
if !exists {
Expand Down Expand Up @@ -890,6 +891,7 @@ type invertedIndexOpaque struct {
func (io *invertedIndexOpaque) Reset() (err error) {
// cleanup stuff over here
io.results = nil
io.init = false
io.chunkMode = 0
io.FieldsMap = nil
io.FieldsInv = nil
Expand Down
35 changes: 1 addition & 34 deletions write.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,41 +101,8 @@ func persistNewFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64,
return rv, nil
}

func persistFields(fieldsInv []string, w *CountHashWriter, dictLocs []uint64) (uint64, error) {
var rv uint64
var fieldsOffsets []uint64

for fieldID, fieldName := range fieldsInv {
// record start of this field
fieldsOffsets = append(fieldsOffsets, uint64(w.Count()))

// write out the dict location and field name length
_, err := writeUvarints(w, dictLocs[fieldID], uint64(len(fieldName)))
if err != nil {
return 0, err
}

// write out the field name
_, err = w.Write([]byte(fieldName))
if err != nil {
return 0, err
}
}

// now write out the fields index
rv = uint64(w.Count())
for fieldID := range fieldsInv {
err := binary.Write(w, binary.BigEndian, fieldsOffsets[fieldID])
if err != nil {
return 0, err
}
}

return rv, nil
}

// FooterSize is the size of the footer record in bytes
// crc + ver + chunk + field offset + stored offset + num docs + docValueOffset
// crc + ver + chunk + docValueOffset + sectionsIndexOffset + field offset + stored offset + num docs
const FooterSize = 4 + 4 + 4 + 8 + 8 + 8 + 8 + 8

func persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, sectionsIndexOffset, docValueOffset uint64,
Expand Down

0 comments on commit bd6d08f

Please sign in to comment.