aboutsummaryrefslogtreecommitdiff
path: root/src/sync
diff options
context:
space:
mode:
authorRuslan Andreev <ruslan.andreev@huawei.com>2021-03-10 19:31:59 +0800
committerEmmanuel Odeke <emmanuel@orijtech.com>2021-04-26 17:13:36 +0000
commitd5d24dbe419c429b43046049d57b97b0abd42a87 (patch)
treee114398b9a8297b9c6ea576335fdbe9c7bf0231d /src/sync
parent1f7ddf57d2908319c0ca7dc621a206935d8726f2 (diff)
downloadgo-d5d24dbe419c429b43046049d57b97b0abd42a87.tar.gz
go-d5d24dbe419c429b43046049d57b97b0abd42a87.zip
sync: improve sync.Pool object stealing
This CL provide abilty to randomly select P to steal object from its shared queue. In order to provide such ability randomOrder structure was copied from runtime/proc.go. It should reduce contention in firsts Ps and improve balance of object stealing across all Ps. Also, the patch provides new benchmark PoolStarvation which force Ps to steal objects. Benchmarks: name old time/op new time/op delta Pool-8 2.16ns ±14% 2.14ns ±16% ~ (p=0.425 n=10+10) PoolOverflow-8 489ns ± 0% 489ns ± 0% ~ (p=0.719 n=9+10) PoolStarvation-8 7.00µs ± 4% 6.59µs ± 2% -5.86% (p=0.000 n=10+10) PoolSTW-8 15.1µs ± 1% 15.2µs ± 1% +0.99% (p=0.001 n=10+10) PoolExpensiveNew-8 1.25ms ±10% 1.31ms ± 9% ~ (p=0.143 n=10+10) [Geo mean] 2.68µs 2.68µs -0.28% name old p50-ns/STW new p50-ns/STW delta PoolSTW-8 15.0k ± 1% 15.1k ± 1% +0.92% (p=0.000 n=10+10) name old p95-ns/STW new p95-ns/STW delta PoolSTW-8 16.2k ± 3% 16.4k ± 2% ~ (p=0.143 n=10+10) name old GCs/op new GCs/op delta PoolExpensiveNew-8 0.29 ± 2% 0.30 ± 1% +2.84% (p=0.000 n=8+10) name old New/op new New/op delta PoolExpensiveNew-8 8.07 ±11% 8.49 ±10% ~ (p=0.123 n=10+10) Change-Id: I3ca1d0bf1f358b1148c58e64740fb2d5bfc0bc02 Reviewed-on: https://go-review.googlesource.com/c/go/+/303949 Reviewed-by: David Chase <drchase@google.com> Trust: Emmanuel Odeke <emmanuel@orijtech.com>
Diffstat (limited to 'src/sync')
-rw-r--r--src/sync/pool.go103
-rw-r--r--src/sync/pool_test.go18
2 files changed, 111 insertions, 10 deletions
diff --git a/src/sync/pool.go b/src/sync/pool.go
index 1ae70127ac..3fb4dc07de 100644
--- a/src/sync/pool.go
+++ b/src/sync/pool.go
@@ -70,6 +70,57 @@ type poolLocal struct {
pad [128 - unsafe.Sizeof(poolLocalInternal{})%128]byte
}
+// The randomOrder and randomEnum are copied from runtime/proc.go
+type randomOrder struct {
+ count uint32
+ coprimes []uint32
+}
+
+type randomEnum struct {
+ i uint32
+ count uint32
+ pos uint32
+ inc uint32
+}
+
+func (ord *randomOrder) reset(count uint32) {
+ ord.count = count
+ ord.coprimes = ord.coprimes[:0]
+ for i := uint32(1); i <= count; i++ {
+ if gcd(i, count) == 1 {
+ ord.coprimes = append(ord.coprimes, i)
+ }
+ }
+}
+
+func (ord *randomOrder) start(i uint32) randomEnum {
+ return randomEnum{
+ count: ord.count,
+ pos: i % ord.count,
+ inc: ord.coprimes[i%uint32(len(ord.coprimes))],
+ }
+}
+
+func (enum *randomEnum) done() bool {
+ return enum.i == enum.count
+}
+
+func (enum *randomEnum) next() {
+ enum.i++
+ enum.pos = (enum.pos + enum.inc) % enum.count
+}
+
+func (enum *randomEnum) position() uint32 {
+ return enum.pos
+}
+
+func gcd(a, b uint32) uint32 {
+ for b != 0 {
+ a, b = b, a%b
+ }
+ return a
+}
+
// from runtime
func fastrand() uint32
@@ -153,12 +204,27 @@ func (p *Pool) Get() interface{} {
func (p *Pool) getSlow(pid int) interface{} {
// See the comment in pin regarding ordering of the loads.
size := runtime_LoadAcquintptr(&p.localSize) // load-acquire
- locals := p.local // load-consume
- // Try to steal one element from other procs.
- for i := 0; i < int(size); i++ {
- l := indexLocal(locals, (pid+i+1)%int(size))
- if x, _ := l.shared.popTail(); x != nil {
- return x
+ // Load pOrder atomically to prevent possible races
+ order := (*randomOrder)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&pOrder)))) // load-consume
+
+ // Pin function always returns non-zero localSize, and it will remain so until runtime_procUnpin
+ // is called. This invariant is maintained by pin ensuring that locals is always big enough to
+ // account for the current P and that poolCleanup can never execute concurrently with a pinned P
+ // due to disabled preemtion.
+ // So, we can remove this condition which protects from division by zero in loop's body,
+ // but we leave it here just to be sure there is no any possibility for error
+ if size != 0 {
+ locals := p.local // load-consume
+ // Try to steal one element from other procs.
+ for rndp := order.start(fastrand()); !rndp.done(); rndp.next() {
+ i := int(rndp.position())
+ // While pOrder is limited to returning indexes within the range of Ps,
+ // locals may be smaller either because it was reset or because of a race
+ // with pinSlow. Hence, we must still mod the local index by size.
+ l := indexLocal(locals, (pid+i+1)%int(size))
+ if x, _ := l.shared.popTail(); x != nil {
+ return x
+ }
}
}
@@ -166,16 +232,25 @@ func (p *Pool) getSlow(pid int) interface{} {
// from all primary caches because we want objects in the
// victim cache to age out if at all possible.
size = atomic.LoadUintptr(&p.victimSize)
+
+ // We also have to ensure that victim cache is big enough to account current P
+ // and size is not equal to zero (protects from division by zero) similar as pin
+ // function do
if uintptr(pid) >= size {
return nil
}
- locals = p.victim
+ locals := p.victim
l := indexLocal(locals, pid)
+
+ // Check private cache
if x := l.private; x != nil {
l.private = nil
return x
}
- for i := 0; i < int(size); i++ {
+
+ // Try to fetch from the tail of other P queues
+ for rndp := order.start(fastrand()); !rndp.done(); rndp.next() {
+ i := int(rndp.position())
l := indexLocal(locals, (pid+i)%int(size))
if x, _ := l.shared.popTail(); x != nil {
return x
@@ -224,9 +299,13 @@ func (p *Pool) pinSlow() (*poolLocal, int) {
}
// If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one.
size := runtime.GOMAXPROCS(0)
+ // Set count of Ps for random ordering
+ order := &randomOrder{}
+ order.reset(uint32(size))
local := make([]poolLocal, size)
- atomic.StorePointer(&p.local, unsafe.Pointer(&local[0])) // store-release
- runtime_StoreReluintptr(&p.localSize, uintptr(size)) // store-release
+ atomic.StorePointer(&p.local, unsafe.Pointer(&local[0])) // store-release
+ atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&pOrder)), unsafe.Pointer(order)) // store-release
+ runtime_StoreReluintptr(&p.localSize, uintptr(size)) // store-release
return &local[pid], pid
}
@@ -267,6 +346,10 @@ var (
// oldPools is the set of pools that may have non-empty victim
// caches. Protected by STW.
oldPools []*Pool
+
+ // pOrder is a random order of Ps used for stealing. Writes
+ // are protected by allPoolsMu. Reads are atomic.
+ pOrder *randomOrder
)
func init() {
diff --git a/src/sync/pool_test.go b/src/sync/pool_test.go
index 65666daab4..6cccd8a533 100644
--- a/src/sync/pool_test.go
+++ b/src/sync/pool_test.go
@@ -271,6 +271,24 @@ func BenchmarkPoolOverflow(b *testing.B) {
})
}
+// Simulate object starvation in order to force Ps to steal objects
+// from other Ps.
+func BenchmarkPoolStarvation(b *testing.B) {
+ var p Pool
+ count := 100
+ count_starved := count - (count / runtime.GOMAXPROCS(0))
+ b.RunParallel(func(pb *testing.PB) {
+ for pb.Next() {
+ for b := 0; b < count_starved; b++ {
+ p.Put(1)
+ }
+ for b := 0; b < count; b++ {
+ p.Get()
+ }
+ }
+ })
+}
+
var globalSink interface{}
func BenchmarkPoolSTW(b *testing.B) {