I have seen on this website a question about the speed of string concatenation. In that topic people wrote about some ephemeral benchmarks with strange numbers. How to efficiently concatenate strings in Go?
I have decided to check those speeds and wrote a test. My test shows other results. On big sizes "+" operator is faster than other methods. Is that right?
Here is my code.
package main
import (
"bytes"
"fmt"
"runtime/debug"
"time"
)
const variations = 30
var time1, time2 time.Time
var delta, catcher string
var x, deltaSize, k, dataSize, operations uint64
var i, j, x_min uint64
var l int
var delta_byte []byte
var method1Speed, method2Speed, method3Speed, method3ASpeed, method2ASpeed [variations]uint64
var dataTotal [variations]uint64
var tmp []byte
func main() {
x_min = 2
operations = 1
for x = variations; x >= x_min; x = x - 2 {
deltaSize = 1 << x // 2^x
dataSize = operations * deltaSize
dataTotal[x-1] = dataSize
fmt.Println("Step #", x, "delta=", deltaSize, "op.=", operations, "data=", dataSize)
fmt.Println("Preparing Data...")
delta_byte = make([]byte, deltaSize)
for i = 0; i < deltaSize; i++ {
delta_byte[i] = 255
}
delta = string(delta_byte)
delta_byte = nil
catcher = ""
debug.FreeOSMemory()
fmt.Println("Testing Method #1...")
time1 = time.Now()
for j = 1; j <= operations; j++ {
//----------------------------
catcher += delta
//----------------------------
}
time2 = time.Now()
method1Speed[x-1] = uint64((1000000 * float64(dataSize)) / float64(time2.Sub(time1).Nanoseconds())) // KiB/sec.
catcher = ""
debug.FreeOSMemory()
fmt.Println("Testing Method #2...")
time1 = time.Now()
for j = 1; j <= operations; j++ {
//----------------------------
stringsJoinViaCopy(&catcher, &catcher, &delta)
//----------------------------
}
time2 = time.Now()
method2Speed[x-1] = uint64((1000000 * float64(dataSize)) / float64(time2.Sub(time1).Nanoseconds())) // KiB/sec.
catcher = ""
debug.FreeOSMemory()
fmt.Println("Testing Method #3...")
time1 = time.Now()
for j = 1; j <= operations; j++ {
//----------------------------
stringsJoinViaBuffer(&catcher, &catcher, &delta)
//----------------------------
}
time2 = time.Now()
method3Speed[x-1] = uint64((1000000 * float64(dataSize)) / float64(time2.Sub(time1).Nanoseconds())) // KiB/sec.
catcher = ""
debug.FreeOSMemory()
fmt.Println("Testing Method #3A...")
time1 = time.Now()
buffer := bytes.NewBuffer(nil)
for j = 1; j <= operations; j++ {
//----------------------------
buffer.WriteString(delta)
//----------------------------
}
catcher = buffer.String()
time2 = time.Now()
method3ASpeed[x-1] = uint64((1000000 * float64(dataSize)) / float64(time2.Sub(time1).Nanoseconds())) // KiB/sec.
catcher = ""
debug.FreeOSMemory()
fmt.Println("Testing Method #2A...")
time1 = time.Now()
tmp = make([]byte, int(operations)*len(delta)) // Cheating (guessing) with size
l = 0
for j = 1; j <= operations; j++ {
//----------------------------
l += copy(tmp[l:], delta)
//----------------------------
}
catcher = string(tmp)
time2 = time.Now()
method2ASpeed[x-1] = uint64((1000000 * float64(dataSize)) / float64(time2.Sub(time1).Nanoseconds())) // KiB/sec.
catcher = ""
delta = ""
debug.FreeOSMemory()
///
operations *= 2
}
// Show Results
fmt.Println("#. ops. Total Data, B. Speed (KiB/sec) M1 M2 M3 M3A M2A")
for x = x_min; x <= variations; x = x + 2 {
dataSize = 1 << x // 2^x
operations = 1 << (variations - x)
fmt.Println(x, operations, dataTotal[x-1], method1Speed[x-1], method2Speed[x-1], method3Speed[x-1],
method3ASpeed[x-1], method2ASpeed[x-1])
}
}
//------------------------------------------------------------------------------
func stringsJoinViaBuffer(dest, a, b *string) {
// Joins two strings (a & b) using Buffer and puts them into dest.
buffer := bytes.NewBuffer(nil)
buffer.WriteString(*a)
buffer.WriteString(*b)
*dest = buffer.String()
}
//------------------------------------------------------------------------------
func stringsJoinViaCopy(dest, a, b *string) {
x := make([]byte, len(*a)+len(*b))
i := 0
i += copy(x[i:], *a)
i += copy(x[i:], *b)
*dest = string(x)
}
Here are results
#. ops. Total Data, B. Speed (KiB/sec) M1 M2 M3 M3A M2A
2 268435456 65536 236 109 57 108413 301653
4 67108864 131072 464 227 113 251519 576660
6 16777216 262144 895 410 202 225300 626165
8 4194304 524288 1514 672 351 205068 552088
10 1048576 1048576 3187 1412 756 207588 532239
12 262144 2097152 7980 3238 1727 209447 592230
14 65536 4194304 16361 6553 3641 230521 536320
16 16384 8388608 29568 12170 6835 241752 604050
18 4096 16777216 55158 23950 13549 238039 563997
20 1024 33554432 98348 43400 25958 216947 521189
22 256 67108864 168906 80442 48725 231806 534722
24 64 134217728 299127 129035 89686 254403 519534
26 16 268435456 529730 207405 153894 284578 506730
28 4 536870912 1167316 353510 268546 359990 523471
30 1 1073741824 909950698305 503703 581848 572763 579852
Seems like it works when you either have a lot of data going constantly or can cheat with size guessing... Is it correct? If there are ocassional strings, simple "+" is better? Somehow, in the mentioned question people measured byte transfers without real-world tasks.
Somehow in the step #26 "+" operator is faster even than cheating with size guessing!
On very large Sizes the Plus Operator ("+") is faster than other Methods.
One thing you're doing wrong is how you're benchmarking the buffer version. You're allocating a new buffer on every iteration, instead you should create a buffer once and keep writing to it until you're done, then you can retrieve your result. Else why use a buffer at all?
buf := bytes.NewBuffer([]byte(catcher))
for j = 1; j <= operations; j++ {
//----------------------------
buf.WriteString(delta)
//----------------------------
}
catcher = buf.String()
Your stringsJoinViaCopy
also unnecessarily allocates a new byte slice every single time. And copy
anyway makes sense over bytes.Buffer
only when you know the size of the string beforehand since Buffer
already uses copy
underneath, together with some growing heuristic for the underlying byte slice.
Here's a Go benchmark starter kit.
concat_test.go
:
package main
import (
"bytes"
"strconv"
"strings"
"testing"
)
func BenchmarkConcat(b *testing.B) {
var s string
for n := 1; n <= 1<<12; n <<= 3 {
s1 := strings.Repeat("a", n)
s2 := strings.Repeat("b", n)
b.Run("PlusL"+strconv.Itoa(n), func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
s = s1 + s2
}
b.StopTimer()
},
)
b.Run("CopyL"+strconv.Itoa(n), func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
buf := make([]byte, len(s1)+len(s2))
copy(buf[copy(buf, s1):], s2)
s = string(buf)
}
b.StopTimer()
},
)
b.Run("BufferL"+strconv.Itoa(n), func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
var buf bytes.Buffer
buf.WriteString(s1)
buf.WriteString(s2)
s = buf.String()
}
b.StopTimer()
},
)
}
_ = s
}
Output:
$ go test -bench=.
goos: linux
goarch: amd64
pkg: so/concat
BenchmarkConcat/PlusL1-4 30000000 55.9 ns/op 2 B/op 1 allocs/op
BenchmarkConcat/CopyL1-4 30000000 63.0 ns/op 4 B/op 2 allocs/op
BenchmarkConcat/BufferL1-4 10000000 115 ns/op 114 B/op 2 allocs/op
BenchmarkConcat/PlusL8-4 20000000 78.1 ns/op 16 B/op 1 allocs/op
BenchmarkConcat/CopyL8-4 20000000 99.2 ns/op 32 B/op 2 allocs/op
BenchmarkConcat/BufferL8-4 10000000 131 ns/op 128 B/op 2 allocs/op
BenchmarkConcat/PlusL64-4 20000000 85.3 ns/op 128 B/op 1 allocs/op
BenchmarkConcat/CopyL64-4 10000000 125 ns/op 256 B/op 2 allocs/op
BenchmarkConcat/BufferL64-4 5000000 328 ns/op 432 B/op 3 allocs/op
BenchmarkConcat/PlusL512-4 5000000 249 ns/op 1024 B/op 1 allocs/op
BenchmarkConcat/CopyL512-4 3000000 457 ns/op 2048 B/op 2 allocs/op
BenchmarkConcat/BufferL512-4 1000000 1012 ns/op 3184 B/op 4 allocs/op
BenchmarkConcat/PlusL4096-4 1000000 1527 ns/op 8192 B/op 1 allocs/op
BenchmarkConcat/CopyL4096-4 500000 3132 ns/op 16384 B/op 2 allocs/op
BenchmarkConcat/BufferL4096-4 300000 4863 ns/op 24688 B/op 4 allocs/op
PASS
ok so/concat 24.308s
$