-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgoroutine_test.go
218 lines (187 loc) · 5.34 KB
/
goroutine_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
package benchmarks
import (
"fmt"
"strconv"
"sync"
"sync/atomic"
"testing"
)
func BenchmarkGoroutine(b *testing.B) {
for _, numGoRoutines := range []int{1, 2, 4, 8, 16, 32, 100, 1000, 10000, 100000} {
numGoRoutines := numGoRoutines
b.Run(strconv.Itoa(numGoRoutines), func(b *testing.B) {
b.ReportAllocs()
for k := 0; k < b.N; k++ {
wg := &sync.WaitGroup{}
wg.Add(numGoRoutines)
target := 400000000 / numGoRoutines
for i := 0; i < numGoRoutines; i++ {
go func() {
total := 0
for j := 0; j < target; j++ {
total += j
}
wg.Done()
if total == 0 {
b.Logf("really?")
}
}()
}
wg.Wait()
}
})
}
}
func BenchmarkGoroutineChannel(b *testing.B) {
for _, numGoRoutines := range []int{1, 2, 8, 32, 1000, 10000, 100000} {
for _, buf := range []int{0, 1, 100, 1000, 10000} {
// Before we start the benchmark we start a pool of goroutines to do the
// work.
ch := make(chan struct{}, buf)
results := make(chan int, 1000)
wg := sync.WaitGroup{}
wg.Add(numGoRoutines)
work := 10000
for i := 0; i < numGoRoutines; i++ {
go func() {
defer wg.Done()
for range ch {
total := 0
for l := 0; l < work; l++ {
total++
}
results <- total
}
}()
}
b.Run(fmt.Sprintf("goroutines=%d,buf=%d", numGoRoutines, buf), func(b *testing.B) {
for k := 0; k < b.N; k++ {
// Send a 1000 requests for work
for i := 0; i < 1000; i++ {
ch <- struct{}{}
}
total := 0
for i := 0; i < 1000; i++ {
total += <-results
}
if total != 1000*work {
b.Logf("total not as expected. %d", total)
}
}
})
close(ch)
wg.Wait()
}
}
}
func BenchmarkGoroutineChannelWork(b *testing.B) {
for _, numGoRoutines := range []int{1, 2, 4, 8} {
numGoRoutines := numGoRoutines
for _, work := range []int{100, 1000, 10000, 100000} {
work := work
b.Run(fmt.Sprintf("goroutines=%d_work=%d", numGoRoutines, work), func(b *testing.B) {
for k := 0; k < b.N; k++ {
var numZeroWork int64
// Stop the timer while we're starting our goroutines
b.StopTimer()
ch := make(chan int, 1000)
wg := &sync.WaitGroup{}
wg.Add(numGoRoutines)
for i := 0; i < numGoRoutines; i++ {
go func() {
total := 0
for j := range ch {
for l := 0; l < work; l++ {
total += j
}
}
wg.Done()
if total == 0 {
atomic.AddInt64(&numZeroWork, 1)
}
}()
}
b.StartTimer()
for i := 0; i < 100000; i++ {
ch <- 1
}
close(ch)
wg.Wait()
if numZeroWork > 0 && k == 0 {
b.Logf("%d goroutines did zero work", numZeroWork)
}
}
})
}
}
}
func BenchmarkJustWork(b *testing.B) {
for _, work := range []int{100, 1000, 10000, 100000} {
b.Run(fmt.Sprintf("work=%d", work), func(b *testing.B) {
b.ReportAllocs()
for k := 0; k < b.N; k++ {
total := 0
for l := 0; l < work; l++ {
total++
}
if total == 0 {
b.Logf("oops")
}
}
})
}
}
/*
We benchmark some work, just adding up. The results are as follows. I used `go test -run ^$ -bench BenchmarkJustWork -count 10` and fed the results into benchstat.
name time/op
JustWork/work=100-8 42.3ns ± 2%
JustWork/work=1000-8 316ns ± 2%
JustWork/work=10000-8 3.05µs ± 2%
JustWork/work=100000-8 31.1µs ± 3%
Now, to calculate the overhead of moving work to a goroutine, we repeat the benchmark, but do our adding up on a goroutine. Here's the result when we restrict it to one CPU.
name time/op
WorkOnGoroutine/work=100 447ns ± 3%
WorkOnGoroutine/work=1000 800ns ± 2%
WorkOnGoroutine/work=10000 3.49µs ± 1%
WorkOnGoroutine/work=100000 31.2µs ± 2%
To calculate the overhead should be just a case of subtracting one set of numbers from the other
100 447 - 42.3 = 405ns
1000 800 - 316 = 484ns
10000 3.49 - 3.05 = 0.44µs
100000 31.2 - 31.1 = 0.1µs
So this is all a bit rough, but it looks like the overhead of this particular way of moving work to a goroutine is about 400ns. The 100,000 case looks worse than this, but I'm going to put this down to rounding effects.
What happens if we allow the benchmark to run on all available CPUs? Here's the numbers.
name time/op
WorkOnGoroutine/work=100-8 559ns ± 2%
WorkOnGoroutine/work=1000-8 985ns ± 1%
WorkOnGoroutine/work=10000-8 4.64µs ± 2%
WorkOnGoroutine/work=100000-8 33.9µs ± 0%
If we work out the overhead again things look much worse.
100 559 - 42.3 = 516.7ns
1000 985 - 316 = 669ns
10000 4.64 - 3.05 = 1.59µs
100000 33.9 - 31.1 = 2.8µs
Not only is the overhead higher, but it gets worse when more work is done on the goroutine. This makes very little sense, as the on the face of it the amount of overhead should be independent of the work.
*/
func BenchmarkWorkOnGoroutine(b *testing.B) {
for _, work := range []int{100, 1000, 10000, 100000} {
b.Run(fmt.Sprintf("work=%d", work), func(b *testing.B) {
b.ReportAllocs()
for k := 0; k < b.N; k++ {
rsp := make(chan int)
go func(rsp chan<- int, work int) {
total := 0
for l := 0; l < work; l++ {
total++
}
rsp <- total
close(rsp)
}(rsp, work)
total := <-rsp
if total == 0 {
b.Logf("oops")
}
}
})
}
}