diff options
author | Russ Cox <rsc@golang.org> | 2022-03-14 12:31:33 -0400 |
---|---|---|
committer | Russ Cox <rsc@golang.org> | 2022-04-05 18:01:26 +0000 |
commit | 9e16cc1541d42cb081d359339e3f45b4b9b2a372 (patch) | |
tree | 469eeb85ad0f949b3bc46d91a807f675b9042b37 /src/hash | |
parent | 9839668b5619f45e293dd40339bf0ac614ea6bee (diff) | |
download | go-9e16cc1541d42cb081d359339e3f45b4b9b2a372.tar.gz go-9e16cc1541d42cb081d359339e3f45b4b9b2a372.zip |
hash/maphash: add Bytes and String
For very small inputs, h.Reset+h.Write+h.Sum64 is fundamentally
slower than a single operation, by about a factor of two, because
Write must copy the data into h's buffer, just in case there is another
Write before the Sum64.
A single function doing the whole sequence knows there is no extra
write that will happen, so it doesn't need the buffer, so it avoids the copy.
Fixes #42710.
Change-Id: Icc79c68ccb10827f6640071d026df86b4940fcc1
Reviewed-on: https://go-review.googlesource.com/c/go/+/392494
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Diffstat (limited to 'src/hash')
-rw-r--r-- | src/hash/maphash/maphash.go | 48 | ||||
-rw-r--r-- | src/hash/maphash/maphash_test.go | 60 |
2 files changed, 88 insertions, 20 deletions
diff --git a/src/hash/maphash/maphash.go b/src/hash/maphash/maphash.go index d022d746a7..973fb68701 100644 --- a/src/hash/maphash/maphash.go +++ b/src/hash/maphash/maphash.go @@ -33,6 +33,54 @@ type Seed struct { s uint64 } +// Bytes returns the hash of b with the given seed. +// +// Bytes is equivalent to, but more convenient and efficient than: +// +// var h Hash +// h.SetSeed(seed) +// h.Write(b) +// return h.Sum() +func Bytes(seed Seed, b []byte) uint64 { + state := seed.s + if state == 0 { + panic("maphash: use of uninitialized Seed") + } + if len(b) == 0 { + return rthash(nil, 0, state) // avoid &b[0] index panic below + } + if len(b) > bufSize { + b = b[:len(b):len(b)] // merge len and cap calculations when reslicing + for len(b) > bufSize { + state = rthash(&b[0], bufSize, state) + b = b[bufSize:] + } + } + return rthash(&b[0], len(b), state) +} + +// String returns the hash of s with the given seed. +// +// String is equivalent to, but more convenient and efficient than: +// +// var h Hash +// h.SetSeed(seed) +// h.WriteString(s) +// return h.Sum() +func String(seed Seed, s string) uint64 { + state := seed.s + if state == 0 { + panic("maphash: use of uninitialized Seed") + } + for len(s) > bufSize { + p := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data) + state = rthash(p, bufSize, state) + s = s[bufSize:] + } + p := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data) + return rthash(p, len(s), state) +} + // A Hash computes a seeded hash of a byte sequence. // // The zero Hash is a valid Hash ready to use. diff --git a/src/hash/maphash/maphash_test.go b/src/hash/maphash/maphash_test.go index 78cdfc0e73..7526989073 100644 --- a/src/hash/maphash/maphash_test.go +++ b/src/hash/maphash/maphash_test.go @@ -6,6 +6,7 @@ package maphash import ( "bytes" + "fmt" "hash" "testing" ) @@ -87,6 +88,14 @@ func TestHashGrouping(t *testing.T) { t.Errorf("hash %d not identical to a single Write", i) } } + + if sum1 := Bytes(hh[0].Seed(), b); sum1 != hh[0].Sum64() { + t.Errorf("hash using Bytes not identical to a single Write") + } + + if sum1 := String(hh[0].Seed(), string(b)); sum1 != hh[0].Sum64() { + t.Errorf("hash using String not identical to a single Write") + } } func TestHashBytesVsString(t *testing.T) { @@ -208,28 +217,39 @@ var _ hash.Hash64 = &Hash{} func benchmarkSize(b *testing.B, size int) { h := &Hash{} buf := make([]byte, size) - b.SetBytes(int64(size)) - b.ResetTimer() - - for i := 0; i < b.N; i++ { - h.Reset() - h.Write(buf) - h.Sum64() - } -} - -func BenchmarkHash8Bytes(b *testing.B) { - benchmarkSize(b, 8) -} + s := string(buf) + + b.Run("Write", func(b *testing.B) { + b.SetBytes(int64(size)) + for i := 0; i < b.N; i++ { + h.Reset() + h.Write(buf) + h.Sum64() + } + }) -func BenchmarkHash320Bytes(b *testing.B) { - benchmarkSize(b, 320) -} + b.Run("Bytes", func(b *testing.B) { + b.SetBytes(int64(size)) + seed := h.Seed() + for i := 0; i < b.N; i++ { + Bytes(seed, buf) + } + }) -func BenchmarkHash1K(b *testing.B) { - benchmarkSize(b, 1024) + b.Run("String", func(b *testing.B) { + b.SetBytes(int64(size)) + seed := h.Seed() + for i := 0; i < b.N; i++ { + String(seed, s) + } + }) } -func BenchmarkHash8K(b *testing.B) { - benchmarkSize(b, 8192) +func BenchmarkHash(b *testing.B) { + sizes := []int{4, 8, 16, 32, 64, 256, 320, 1024, 4096, 16384} + for _, size := range sizes { + b.Run(fmt.Sprint("n=", size), func(b *testing.B) { + benchmarkSize(b, size) + }) + } } |