From dc650677068bcc3a5823d592d55207eeafbbd914 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Thu, 3 Jan 2019 20:30:57 -0500 Subject: [PATCH 01/17] working pooling --- builder.go | 37 ++++++++++++++++++++++++++++++++++--- builder_test.go | 11 ++++++----- registry.go | 14 ++++++++++++++ 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/builder.go b/builder.go index f793329..21d30d2 100644 --- a/builder.go +++ b/builder.go @@ -50,7 +50,8 @@ func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) { if opts == nil { opts = defaultBuilderOpts } - builderNodePool := &builderNodePool{} + + builderNodePool := newBuilderNodePool(100000) rv := &Builder{ unfinished: newUnfinishedNodes(builderNodePool), registry: newRegistry(builderNodePool, opts.RegistryTableSize, opts.RegistryMRUSize), @@ -83,6 +84,7 @@ func (b *Builder) Reset(w io.Writer) error { if err != nil { return err } + return nil } @@ -136,30 +138,40 @@ func (b *Builder) Close() error { func (b *Builder) compileFrom(iState int) error { addr := noneAddr + // var lastNode *builderNode for iState+1 < len(b.unfinished.stack) { + // b.builderNodePool.Put(lastNode) var node *builderNode if addr == noneAddr { node = b.unfinished.popEmpty() + // returnToPool = true } else { node = b.unfinished.popFreeze(addr) + // returnToPool = true } var err error addr, err = b.compile(node) if err != nil { return nil } + // lastNode = node } b.unfinished.topLastFreeze(addr) + // b.builderNodePool.Put(lastNode) return nil } +var finalCount = 0 + func (b *Builder) compile(node *builderNode) (int, error) { if node.final && len(node.trans) == 0 && node.finalOutput == 0 { + b.builderNodePool.Put(node) return 0, nil } found, addr, entry := b.registry.entry(node) if found { + b.builderNodePool.Put(node) return addr, nil } addr, err := b.encoder.encodeState(node, b.lastAddr) @@ -258,7 +270,10 @@ func (u *unfinishedNodes) pushEmpty(final bool) { u.stack = append(u.stack, next) } +var popRootCount = 0 + func (u *unfinishedNodes) popRoot() *builderNode { + popRootCount++ l := len(u.stack) var unfinished *builderNodeUnfinished u.stack, unfinished = u.stack[:l-1], u.stack[l-1] @@ -430,12 +445,27 @@ func outputCat(l, r uint64) uint64 { // +-new char--------| builderNode Pool |<-----------evicted // +-------------------+ type builderNodePool struct { - head *builderNode + gets int + puts int + allocs int + head *builderNode +} + +func newBuilderNodePool(size int) *builderNodePool { + pool := &builderNodePool{} + for i := 0; i < size; i++ { + pool.Put(&builderNode{trans: make([]transition, 0, 10)}) + } + return pool } func (p *builderNodePool) Get() *builderNode { + p.gets++ if p.head == nil { - return &builderNode{} + p.allocs++ + return &builderNode{ + trans: make([]transition, 0, 10), + } } head := p.head p.head = p.head.next @@ -446,6 +476,7 @@ func (p *builderNodePool) Put(v *builderNode) { if v == nil { return } + p.puts++ v.reset() v.next = p.head p.head = v diff --git a/builder_test.go b/builder_test.go index ec8ed6a..1a60f78 100644 --- a/builder_test.go +++ b/builder_test.go @@ -66,6 +66,7 @@ func TestBuilderSimple(t *testing.T) { if err != nil { t.Errorf("got error closing set builder: %v", err) } + b.Reset(ioutil.Discard) } func TestBuilderSharedPrefix(t *testing.T) { @@ -238,12 +239,12 @@ func BenchmarkBuilder(b *testing.B) { b.ResetTimer() + builder, err := New(ioutil.Discard, nil) + if err != nil { + b.Fatalf("error creating builder: %v", err) + } for i := 0; i < b.N; i++ { - - builder, err := New(ioutil.Discard, nil) - if err != nil { - b.Fatalf("error creating builder: %v", err) - } + builder.Reset(ioutil.Discard) err = insertStrings(builder, dataset, randomThousandVals) if err != nil { b.Fatalf("error inserting thousand words: %v", err) diff --git a/registry.go b/registry.go index f5b9b4d..1582241 100644 --- a/registry.go +++ b/registry.go @@ -37,16 +37,25 @@ func newRegistry(p *builderNodePool, tableSize, mruSize int) *registry { return rv } +var notEmptyCount = 0 + func (r *registry) Reset() { var empty registryCell for i := range r.table { + if r.table[i].node != nil { + notEmptyCount++ + } r.builderNodePool.Put(r.table[i].node) r.table[i] = empty } } +var entryCount = 0 + func (r *registry) entry(node *builderNode) (bool, int, *registryCell) { + entryCount++ if len(r.table) == 0 { + panic("bad") return false, 0, nil } bucket := r.hash(node) @@ -77,6 +86,9 @@ func (r *registry) hash(b *builderNode) int { type registryCache []registryCell +var matchCount = 0 +var insertCount = 0 + func (r registryCache) entry(node *builderNode, pool *builderNodePool) (bool, int, *registryCell) { if len(r) == 1 { if r[0].node != nil && r[0].node.equiv(node) { @@ -90,9 +102,11 @@ func (r registryCache) entry(node *builderNode, pool *builderNodePool) (bool, in if r[i].node != nil && r[i].node.equiv(node) { addr := r[i].addr r.promote(i) + matchCount++ return true, addr, nil } } + // no match last := len(r) - 1 pool.Put(r[last].node) From 02d88b994c7e2a6bb38eab64d748a913473b6653 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Thu, 3 Jan 2019 21:00:18 -0500 Subject: [PATCH 02/17] improve pooling more --- builder.go | 45 ++++++++++++++++++++++++++++++++------------- builder_test.go | 3 +++ registry.go | 1 - vellum.go | 7 ++++--- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/builder.go b/builder.go index 21d30d2..73427e3 100644 --- a/builder.go +++ b/builder.go @@ -23,6 +23,10 @@ var defaultBuilderOpts = &BuilderOpts{ Encoder: 1, RegistryTableSize: 10000, RegistryMRUSize: 2, + BuilderNodePoolingConfig: BuilderNodePoolingConfig{ + MaxSize: 10000, + MaxTransitionSize: 100, + }, } // A Builder is used to build a new FST. When possible data is @@ -51,7 +55,7 @@ func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) { opts = defaultBuilderOpts } - builderNodePool := newBuilderNodePool(100000) + builderNodePool := newBuilderNodePool(opts.BuilderNodePoolingConfig) rv := &Builder{ unfinished: newUnfinishedNodes(builderNodePool), registry: newRegistry(builderNodePool, opts.RegistryTableSize, opts.RegistryMRUSize), @@ -431,6 +435,18 @@ func outputCat(l, r uint64) uint64 { return l + r } +// BuilderNodePoolingConfig is the configuration struct for the BuilderNodePool. +// Note that unsafe.SizeOf(transition{}) is 24 bytes and unsafe.SizeOf(BuilderNode{}) +// is 48 bytes so the amount of memory used by the pool should be approximately +// MaxSize * (48 + 24 * MaxTransitionSize) not including the extra space required +// by the G.C. +type BuilderNodePoolingConfig struct { + // Maximum number of builder nodes can be retained in the pool. + MaxSize int + // Maximum size of the transitions array for an individual builder node. + MaxTransitionSize int +} + // builderNodePool pools builderNodes using a singly linked list. // // NB: builderNode lifecylce is described by the following interactions - @@ -445,39 +461,42 @@ func outputCat(l, r uint64) uint64 { // +-new char--------| builderNode Pool |<-----------evicted // +-------------------+ type builderNodePool struct { - gets int - puts int - allocs int + config BuilderNodePoolingConfig + size int head *builderNode } -func newBuilderNodePool(size int) *builderNodePool { - pool := &builderNodePool{} - for i := 0; i < size; i++ { - pool.Put(&builderNode{trans: make([]transition, 0, 10)}) +func newBuilderNodePool(config BuilderNodePoolingConfig) *builderNodePool { + // Pool will lazy alloc. + return &builderNodePool{ + config: config, } - return pool } func (p *builderNodePool) Get() *builderNode { - p.gets++ if p.head == nil { - p.allocs++ return &builderNode{ trans: make([]transition, 0, 10), } } head := p.head p.head = p.head.next + p.size-- return head } +var counter = 0 + func (p *builderNodePool) Put(v *builderNode) { - if v == nil { + if v == nil || + p.size >= p.config.MaxSize || + cap(v.trans) > p.config.MaxTransitionSize { + // Don't store nil or allow the pool to violate its config. return } - p.puts++ + v.reset() v.next = p.head p.head = v + p.size++ } diff --git a/builder_test.go b/builder_test.go index 1a60f78..f0930c3 100644 --- a/builder_test.go +++ b/builder_test.go @@ -234,6 +234,9 @@ func loadWords(path string) ([]string, error) { var thousandTestWords []string func BenchmarkBuilder(b *testing.B) { + //fmt.Println(unsafe.Sizeof(builderNode{})) + // fmt.Println(unsafe.Sizeof(transition{})) + // panic("hmm") dataset := thousandTestWords randomThousandVals := randomValues(dataset) diff --git a/registry.go b/registry.go index 1582241..426a626 100644 --- a/registry.go +++ b/registry.go @@ -55,7 +55,6 @@ var entryCount = 0 func (r *registry) entry(node *builderNode) (bool, int, *registryCell) { entryCount++ if len(r.table) == 0 { - panic("bad") return false, 0, nil } bucket := r.hash(node) diff --git a/vellum.go b/vellum.go index b2537b3..f3f8f42 100644 --- a/vellum.go +++ b/vellum.go @@ -54,9 +54,10 @@ var ErrIteratorDone = errors.New("iterator-done") // BuilderOpts is a structure to let advanced users customize the behavior // of the builder and some aspects of the generated FST. type BuilderOpts struct { - Encoder int - RegistryTableSize int - RegistryMRUSize int + Encoder int + RegistryTableSize int + RegistryMRUSize int + BuilderNodePoolingConfig BuilderNodePoolingConfig } // New returns a new Builder which will stream out the From 4c5fe799c291479f075d7f63de549fda7b4592cc Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Thu, 3 Jan 2019 21:02:52 -0500 Subject: [PATCH 03/17] remove unused code --- builder.go | 11 ----------- builder_test.go | 4 ---- registry.go | 12 ------------ 3 files changed, 27 deletions(-) diff --git a/builder.go b/builder.go index 73427e3..d5a1615 100644 --- a/builder.go +++ b/builder.go @@ -142,31 +142,23 @@ func (b *Builder) Close() error { func (b *Builder) compileFrom(iState int) error { addr := noneAddr - // var lastNode *builderNode for iState+1 < len(b.unfinished.stack) { - // b.builderNodePool.Put(lastNode) var node *builderNode if addr == noneAddr { node = b.unfinished.popEmpty() - // returnToPool = true } else { node = b.unfinished.popFreeze(addr) - // returnToPool = true } var err error addr, err = b.compile(node) if err != nil { return nil } - // lastNode = node } b.unfinished.topLastFreeze(addr) - // b.builderNodePool.Put(lastNode) return nil } -var finalCount = 0 - func (b *Builder) compile(node *builderNode) (int, error) { if node.final && len(node.trans) == 0 && node.finalOutput == 0 { @@ -274,10 +266,7 @@ func (u *unfinishedNodes) pushEmpty(final bool) { u.stack = append(u.stack, next) } -var popRootCount = 0 - func (u *unfinishedNodes) popRoot() *builderNode { - popRootCount++ l := len(u.stack) var unfinished *builderNodeUnfinished u.stack, unfinished = u.stack[:l-1], u.stack[l-1] diff --git a/builder_test.go b/builder_test.go index f0930c3..8eef85e 100644 --- a/builder_test.go +++ b/builder_test.go @@ -66,7 +66,6 @@ func TestBuilderSimple(t *testing.T) { if err != nil { t.Errorf("got error closing set builder: %v", err) } - b.Reset(ioutil.Discard) } func TestBuilderSharedPrefix(t *testing.T) { @@ -234,9 +233,6 @@ func loadWords(path string) ([]string, error) { var thousandTestWords []string func BenchmarkBuilder(b *testing.B) { - //fmt.Println(unsafe.Sizeof(builderNode{})) - // fmt.Println(unsafe.Sizeof(transition{})) - // panic("hmm") dataset := thousandTestWords randomThousandVals := randomValues(dataset) diff --git a/registry.go b/registry.go index 426a626..c83c82a 100644 --- a/registry.go +++ b/registry.go @@ -37,23 +37,15 @@ func newRegistry(p *builderNodePool, tableSize, mruSize int) *registry { return rv } -var notEmptyCount = 0 - func (r *registry) Reset() { var empty registryCell for i := range r.table { - if r.table[i].node != nil { - notEmptyCount++ - } r.builderNodePool.Put(r.table[i].node) r.table[i] = empty } } -var entryCount = 0 - func (r *registry) entry(node *builderNode) (bool, int, *registryCell) { - entryCount++ if len(r.table) == 0 { return false, 0, nil } @@ -85,9 +77,6 @@ func (r *registry) hash(b *builderNode) int { type registryCache []registryCell -var matchCount = 0 -var insertCount = 0 - func (r registryCache) entry(node *builderNode, pool *builderNodePool) (bool, int, *registryCell) { if len(r) == 1 { if r[0].node != nil && r[0].node.equiv(node) { @@ -101,7 +90,6 @@ func (r registryCache) entry(node *builderNode, pool *builderNodePool) (bool, in if r[i].node != nil && r[i].node.equiv(node) { addr := r[i].addr r.promote(i) - matchCount++ return true, addr, nil } } From 44650082f121a88e77b58f3a3de27100cf6f52f4 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Thu, 3 Jan 2019 21:11:23 -0500 Subject: [PATCH 04/17] Add comments --- builder.go | 8 ++++++++ registry.go | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/builder.go b/builder.go index d5a1615..62e0bc9 100644 --- a/builder.go +++ b/builder.go @@ -162,14 +162,22 @@ func (b *Builder) compileFrom(iState int) error { func (b *Builder) compile(node *builderNode) (int, error) { if node.final && len(node.trans) == 0 && node.finalOutput == 0 { + // We're done with this node so its safe to put it back in the pool. b.builderNodePool.Put(node) return 0, nil } found, addr, entry := b.registry.entry(node) if found { + // This node already existed in the registry (and thus the registry + // did not assume ownership of it) so its safe to put it back in + // the pool. b.builderNodePool.Put(node) return addr, nil } + // If the node was not found in the registry, then the registry will + // have assumed ownership of it and is responsible for returning it + // to the pool. + addr, err := b.encoder.encodeState(node, b.lastAddr) if err != nil { return 0, err diff --git a/registry.go b/registry.go index c83c82a..a7f8e27 100644 --- a/registry.go +++ b/registry.go @@ -77,6 +77,10 @@ func (r *registry) hash(b *builderNode) int { type registryCache []registryCell +// The registry is responsible for returning BuilderNodes that it controls to the BuilderNodePool once +// they are evicted. As a result, all the codepaths in the entry method that return false (entry was not +// found and the registry is assuming ownership of this node) will return the corresponding evicted to to +// the builderNodePool. func (r registryCache) entry(node *builderNode, pool *builderNodePool) (bool, int, *registryCell) { if len(r) == 1 { if r[0].node != nil && r[0].node.equiv(node) { From 64fb9e815d2e32098adbf0269548b3cc937c30c6 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Thu, 3 Jan 2019 21:30:09 -0500 Subject: [PATCH 05/17] Document registry --- builder.go | 19 ++++++++----------- registry.go | 7 +++++++ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/builder.go b/builder.go index 62e0bc9..8077bbd 100644 --- a/builder.go +++ b/builder.go @@ -446,17 +446,14 @@ type BuilderNodePoolingConfig struct { // builderNodePool pools builderNodes using a singly linked list. // -// NB: builderNode lifecylce is described by the following interactions - -// +------------------------+ +----------------------+ -// | Unfinished Nodes | Transfer once | Registry | -// |(not frozen builderNode)|-----builderNode is ------->| (frozen builderNode) | -// +------------------------+ marked frozen +----------------------+ -// ^ | -// | | -// | Put() -// | Get() on +-------------------+ when -// +-new char--------| builderNode Pool |<-----------evicted -// +-------------------+ +// The lifecycle is as follows: +// +// 1. Builder retrieves a node from the pool using Get() whenever it needs one. +// 2. After a node is compiled it is either: +// a. Discarded and immediately returned to the pool. +// b. Transferred to the registry (which assumes ownership of it) and will +// return it to the pool when it evicts the node to make room for another, +// or when the entire registry is Reset(). type builderNodePool struct { config BuilderNodePoolingConfig size int diff --git a/registry.go b/registry.go index a7f8e27..2134f0a 100644 --- a/registry.go +++ b/registry.go @@ -19,6 +19,13 @@ type registryCell struct { node *builderNode } +// Registry is used as a form of LRU so that the number of nodes that need to be kept +// in memory is reduced. When the builder is compiling the FST and is presented with +// compiling a given node, it can check the registry to see if an equivalent node has +// already been compiled. If so, the registry will return the address of the already +// compiled node and the builder can use that. If an equivalent node has not already +// been compiled (or was, but has since been evicted from the LRU), the builder will +// recompile it into the encoder and then add it to the registry for future use. type registry struct { builderNodePool *builderNodePool table []registryCell From ff600d9832cf3d8a8451737a34d5cf0d4cb97c97 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Thu, 3 Jan 2019 21:39:43 -0500 Subject: [PATCH 06/17] Add builder reusable test --- vellum_test.go | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/vellum_test.go b/vellum_test.go index 652d9b6..c9fd95b 100644 --- a/vellum_test.go +++ b/vellum_test.go @@ -149,9 +149,26 @@ func TestRoundTripSimple(t *testing.T) { } func TestRoundTripThousand(t *testing.T) { - dataset := thousandTestWords - randomThousandVals := randomValues(dataset) + b, err := New(nil, nil) + if err != nil { + t.Fatalf("error creating builder: %v", err) + } + + testRoundTripThousand(t, b) +} + +func TestRoundTripThousandBuilderIsReusable(t *testing.T) { + b, err := New(nil, nil) + if err != nil { + t.Fatalf("error creating builder: %v", err) + } + for i := 0; i < 1000; i++ { + testRoundTripThousand(t, b) + } +} + +func testRoundTripThousand(t *testing.T, b *Builder) { f, err := ioutil.TempFile("", "vellum") if err != nil { t.Fatal(err) @@ -167,10 +184,10 @@ func TestRoundTripThousand(t *testing.T) { } }() - b, err := New(f, nil) - if err != nil { - t.Fatalf("error creating builder: %v", err) - } + b.Reset(f) + + dataset := thousandTestWords + randomThousandVals := randomValues(dataset) err = insertStrings(b, dataset, randomThousandVals) if err != nil { From 1be2a57d143b96323473e37a23abbe7882f8edb3 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Fri, 4 Jan 2019 09:33:43 -0500 Subject: [PATCH 07/17] remove global var --- builder.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/builder.go b/builder.go index 8077bbd..39bbef6 100644 --- a/builder.go +++ b/builder.go @@ -479,8 +479,6 @@ func (p *builderNodePool) Get() *builderNode { return head } -var counter = 0 - func (p *builderNodePool) Put(v *builderNode) { if v == nil || p.size >= p.config.MaxSize || From 3bcc7a358c1b756b10534167430d569b47e5e3b0 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Fri, 4 Jan 2019 09:37:14 -0500 Subject: [PATCH 08/17] fix comment typo --- registry.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/registry.go b/registry.go index 2134f0a..f023d97 100644 --- a/registry.go +++ b/registry.go @@ -86,7 +86,7 @@ type registryCache []registryCell // The registry is responsible for returning BuilderNodes that it controls to the BuilderNodePool once // they are evicted. As a result, all the codepaths in the entry method that return false (entry was not -// found and the registry is assuming ownership of this node) will return the corresponding evicted to to +// found and the registry is assuming ownership of this node) will return the corresponding evicted node to // the builderNodePool. func (r registryCache) entry(node *builderNode, pool *builderNodePool) (bool, int, *registryCell) { if len(r) == 1 { From 472accfaeb22b9157cc93b58786ec7569d4d686b Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Fri, 4 Jan 2019 15:24:24 -0500 Subject: [PATCH 09/17] more reasonable defaults --- builder.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/builder.go b/builder.go index 39bbef6..d1e8f4c 100644 --- a/builder.go +++ b/builder.go @@ -21,11 +21,11 @@ import ( var defaultBuilderOpts = &BuilderOpts{ Encoder: 1, - RegistryTableSize: 10000, + RegistryTableSize: 1000, RegistryMRUSize: 2, BuilderNodePoolingConfig: BuilderNodePoolingConfig{ MaxSize: 10000, - MaxTransitionSize: 100, + MaxTransitionSize: 32, }, } @@ -469,9 +469,7 @@ func newBuilderNodePool(config BuilderNodePoolingConfig) *builderNodePool { func (p *builderNodePool) Get() *builderNode { if p.head == nil { - return &builderNode{ - trans: make([]transition, 0, 10), - } + return &builderNode{} } head := p.head p.head = p.head.next From 2a809548503f6fb05f8efd540c303ff399cb3897 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Sat, 5 Jan 2019 14:44:47 -0500 Subject: [PATCH 10/17] Add comment --- builder.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/builder.go b/builder.go index d1e8f4c..656ca39 100644 --- a/builder.go +++ b/builder.go @@ -24,6 +24,13 @@ var defaultBuilderOpts = &BuilderOpts{ RegistryTableSize: 1000, RegistryMRUSize: 2, BuilderNodePoolingConfig: BuilderNodePoolingConfig{ + // This value should always be significantly larger than + // RegistryTableSize * RegistryMRUSize because that defines + // how many items can be stored in the registry, and if the + // pools MaxSize is not much larger than that, then all the + // BuilderNodes will end up stuck in the registry and not + // returned to the pool which will cause the building process + // to begin allocating a lot. MaxSize: 10000, MaxTransitionSize: 32, }, From 68fca1f76012ebc2e1322e3c4809d5cabf3eab86 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Sat, 5 Jan 2019 14:49:42 -0500 Subject: [PATCH 11/17] Change defaults --- builder.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builder.go b/builder.go index 656ca39..beb0992 100644 --- a/builder.go +++ b/builder.go @@ -21,7 +21,7 @@ import ( var defaultBuilderOpts = &BuilderOpts{ Encoder: 1, - RegistryTableSize: 1000, + RegistryTableSize: 10000, RegistryMRUSize: 2, BuilderNodePoolingConfig: BuilderNodePoolingConfig{ // This value should always be significantly larger than @@ -31,7 +31,7 @@ var defaultBuilderOpts = &BuilderOpts{ // BuilderNodes will end up stuck in the registry and not // returned to the pool which will cause the building process // to begin allocating a lot. - MaxSize: 10000, + MaxSize: 100000, MaxTransitionSize: 32, }, } From 8bc3063dd1f486748b41275fc5565c4fd25043d1 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Sat, 5 Jan 2019 14:51:30 -0500 Subject: [PATCH 12/17] better comment --- builder.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builder.go b/builder.go index beb0992..29f217f 100644 --- a/builder.go +++ b/builder.go @@ -441,9 +441,10 @@ func outputCat(l, r uint64) uint64 { // BuilderNodePoolingConfig is the configuration struct for the BuilderNodePool. // Note that unsafe.SizeOf(transition{}) is 24 bytes and unsafe.SizeOf(BuilderNode{}) -// is 48 bytes so the amount of memory used by the pool should be approximately +// is 48 bytes so the maximum amount of memory used by the pool should be approximately // MaxSize * (48 + 24 * MaxTransitionSize) not including the extra space required -// by the G.C. +// by the G.C. Note if an F.S.T construction never requires this many BuilderNodes then +// the maximum size of the pool will never be reached as it is allocated lazily. type BuilderNodePoolingConfig struct { // Maximum number of builder nodes can be retained in the pool. MaxSize int From 4ee95eebd851ff6a6dd9cf9ea7d510ad1b30db8e Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Sat, 5 Jan 2019 14:55:51 -0500 Subject: [PATCH 13/17] pass errcheck lint --- builder_test.go | 7 ++++++- vellum_test.go | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/builder_test.go b/builder_test.go index 8eef85e..8d7e9b2 100644 --- a/builder_test.go +++ b/builder_test.go @@ -243,7 +243,12 @@ func BenchmarkBuilder(b *testing.B) { b.Fatalf("error creating builder: %v", err) } for i := 0; i < b.N; i++ { - builder.Reset(ioutil.Discard) + err = builder.Reset(ioutil.Discard) + if err != nil { + if err != nil { + b.Fatalf("error reseting builder: %v", err) + } + } err = insertStrings(builder, dataset, randomThousandVals) if err != nil { b.Fatalf("error inserting thousand words: %v", err) diff --git a/vellum_test.go b/vellum_test.go index c9fd95b..20e00f9 100644 --- a/vellum_test.go +++ b/vellum_test.go @@ -184,7 +184,10 @@ func testRoundTripThousand(t *testing.T, b *Builder) { } }() - b.Reset(f) + err = b.Reset(f) + if err != nil { + t.Fatalf("error resetting builder: %v", err) + } dataset := thousandTestWords randomThousandVals := randomValues(dataset) From 346219c06c3266bce1df162ead1f040744ca90f7 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Sat, 5 Jan 2019 15:04:02 -0500 Subject: [PATCH 14/17] Registry cell optimization --- registry.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/registry.go b/registry.go index f023d97..896dca6 100644 --- a/registry.go +++ b/registry.go @@ -47,7 +47,12 @@ func newRegistry(p *builderNodePool, tableSize, mruSize int) *registry { func (r *registry) Reset() { var empty registryCell for i := range r.table { - r.builderNodePool.Put(r.table[i].node) + if r.table[i].node != nil { + // Only try and return to the pool if the node actually exists to + // avoid excessive function call overhead in the scenario where many + // of the cells are empty. + r.builderNodePool.Put(r.table[i].node) + } r.table[i] = empty } } From cf09c9cc2bb6a17ae370e1523c6eb0825734fb58 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Sat, 5 Jan 2019 15:21:56 -0500 Subject: [PATCH 15/17] make stack size configurable --- builder.go | 20 +++++++++++++------- vellum.go | 1 + 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/builder.go b/builder.go index 29f217f..993246f 100644 --- a/builder.go +++ b/builder.go @@ -20,9 +20,10 @@ import ( ) var defaultBuilderOpts = &BuilderOpts{ - Encoder: 1, - RegistryTableSize: 10000, - RegistryMRUSize: 2, + Encoder: 1, + RegistryTableSize: 10000, + RegistryMRUSize: 2, + UnfinishedNodesStackSize: 64, BuilderNodePoolingConfig: BuilderNodePoolingConfig{ // This value should always be significantly larger than // RegistryTableSize * RegistryMRUSize because that defines @@ -64,7 +65,7 @@ func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) { builderNodePool := newBuilderNodePool(opts.BuilderNodePoolingConfig) rv := &Builder{ - unfinished: newUnfinishedNodes(builderNodePool), + unfinished: newUnfinishedNodes(builderNodePool, opts), registry: newRegistry(builderNodePool, opts.RegistryTableSize, opts.RegistryMRUSize), builderNodePool: builderNodePool, opts: opts, @@ -217,10 +218,15 @@ func (u *unfinishedNodes) Reset() { u.pushEmpty(false) } -func newUnfinishedNodes(p *builderNodePool) *unfinishedNodes { +func newUnfinishedNodes(p *builderNodePool, opts *BuilderOpts) *unfinishedNodes { + initialSize := opts.UnfinishedNodesStackSize + if initialSize <= 0 { + initialSize = defaultBuilderOpts.UnfinishedNodesStackSize + } + rv := &unfinishedNodes{ - stack: make([]*builderNodeUnfinished, 0, 64), - cache: make([]builderNodeUnfinished, 64), + stack: make([]*builderNodeUnfinished, 0, initialSize), + cache: make([]builderNodeUnfinished, initialSize), builderNodePool: p, } rv.pushEmpty(false) diff --git a/vellum.go b/vellum.go index f3f8f42..320f607 100644 --- a/vellum.go +++ b/vellum.go @@ -57,6 +57,7 @@ type BuilderOpts struct { Encoder int RegistryTableSize int RegistryMRUSize int + UnfinishedNodesStackSize int BuilderNodePoolingConfig BuilderNodePoolingConfig } From 72caa5dbe888702cd89a5e599070143ec9aa20cb Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Fri, 12 Feb 2021 16:01:35 -0500 Subject: [PATCH 16/17] revert benchmark change --- builder_test.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/builder_test.go b/builder_test.go index 8d7e9b2..31d25e4 100644 --- a/builder_test.go +++ b/builder_test.go @@ -238,16 +238,10 @@ func BenchmarkBuilder(b *testing.B) { b.ResetTimer() - builder, err := New(ioutil.Discard, nil) - if err != nil { - b.Fatalf("error creating builder: %v", err) - } for i := 0; i < b.N; i++ { - err = builder.Reset(ioutil.Discard) + builder, err := New(ioutil.Discard, nil) if err != nil { - if err != nil { b.Fatalf("error reseting builder: %v", err) - } } err = insertStrings(builder, dataset, randomThousandVals) if err != nil { From d4d45d7c0ac95ebf0c41a11e991cc83627946bcf Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Fri, 12 Feb 2021 16:03:34 -0500 Subject: [PATCH 17/17] revert it properly --- builder_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/builder_test.go b/builder_test.go index 31d25e4..ec8ed6a 100644 --- a/builder_test.go +++ b/builder_test.go @@ -239,9 +239,10 @@ func BenchmarkBuilder(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { + builder, err := New(ioutil.Discard, nil) if err != nil { - b.Fatalf("error reseting builder: %v", err) + b.Fatalf("error creating builder: %v", err) } err = insertStrings(builder, dataset, randomThousandVals) if err != nil {