aboutsummaryrefslogtreecommitdiff
path: root/src/sync
diff options
context:
space:
mode:
authorPaul E. Murphy <murp@ibm.com>2020-09-09 17:24:23 -0500
committerLynn Boger <laboger@linux.vnet.ibm.com>2020-10-21 14:34:44 +0000
commit15ead857dbc638b9d83a7686acf0dc746fc45918 (patch)
treedb6466a49022bd1c47f01f3d557bb85d63b45d7c /src/sync
parent54c0237346adfc2cac7bbebba80d652227ab6ea5 (diff)
downloadgo-15ead857dbc638b9d83a7686acf0dc746fc45918.tar.gz
go-15ead857dbc638b9d83a7686acf0dc746fc45918.zip
cmd/compiler,cmd/go,sync: add internal {LoadAcq,StoreRel}64 on ppc64
Add an internal atomic intrinsic for load with acquire semantics (extending LoadAcq to 64b) and add LoadAcquintptr for internal use within the sync package. For other arches, this remaps to the appropriate atomic.Load{,64} intrinsic which should not alter code generation. Similarly, add StoreRel{uintptr,64} for consistency, and inline. Finally, add an exception to allow sync to directly use the runtime/internal/atomic package which avoids more convoluted workarounds (contributed by Lynn Boger). In an extreme example, sync.(*Pool).pin consumes 20% of wall time during fmt tests. This is reduced to 5% on ppc64le/power9. From the fmt benchmarks on ppc64le: name old time/op new time/op delta SprintfPadding 468ns ± 0% 451ns ± 0% -3.63% SprintfEmpty 73.3ns ± 0% 51.9ns ± 0% -29.20% SprintfString 135ns ± 0% 122ns ± 0% -9.63% SprintfTruncateString 232ns ± 0% 214ns ± 0% -7.76% SprintfTruncateBytes 216ns ± 0% 202ns ± 0% -6.48% SprintfSlowParsingPath 162ns ± 0% 142ns ± 0% -12.35% SprintfQuoteString 1.00µs ± 0% 0.99µs ± 0% -1.39% SprintfInt 117ns ± 0% 104ns ± 0% -11.11% SprintfIntInt 190ns ± 0% 175ns ± 0% -7.89% SprintfPrefixedInt 232ns ± 0% 212ns ± 0% -8.62% SprintfFloat 270ns ± 0% 255ns ± 0% -5.56% SprintfComplex 1.01µs ± 0% 0.99µs ± 0% -1.68% SprintfBoolean 127ns ± 0% 111ns ± 0% -12.60% SprintfHexString 220ns ± 0% 198ns ± 0% -10.00% SprintfHexBytes 261ns ± 0% 252ns ± 0% -3.45% SprintfBytes 600ns ± 0% 590ns ± 0% -1.67% SprintfStringer 684ns ± 0% 658ns ± 0% -3.80% SprintfStructure 2.57µs ± 0% 2.57µs ± 0% -0.12% ManyArgs 669ns ± 0% 646ns ± 0% -3.44% FprintInt 140ns ± 0% 136ns ± 0% -2.86% FprintfBytes 184ns ± 0% 181ns ± 0% -1.63% FprintIntNoAlloc 140ns ± 0% 136ns ± 0% -2.86% ScanInts 929µs ± 0% 921µs ± 0% -0.79% ScanRecursiveInt 122ms ± 0% 121ms ± 0% -0.11% ScanRecursiveIntReaderWrapper 122ms ± 0% 122ms ± 0% -0.18% Change-Id: I4d66780261b57b06ef600229e475462e7313f0d6 Reviewed-on: https://go-review.googlesource.com/c/go/+/253748 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Keith Randall <khr@golang.org> Trust: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Go Bot <gobot@golang.org>
Diffstat (limited to 'src/sync')
-rw-r--r--src/sync/pool.go15
1 files changed, 8 insertions, 7 deletions
diff --git a/src/sync/pool.go b/src/sync/pool.go
index ca7afdb12f..137413fdc4 100644
--- a/src/sync/pool.go
+++ b/src/sync/pool.go
@@ -7,6 +7,7 @@ package sync
import (
"internal/race"
"runtime"
+ runtimeatomic "runtime/internal/atomic"
"sync/atomic"
"unsafe"
)
@@ -152,8 +153,8 @@ func (p *Pool) Get() interface{} {
func (p *Pool) getSlow(pid int) interface{} {
// See the comment in pin regarding ordering of the loads.
- size := atomic.LoadUintptr(&p.localSize) // load-acquire
- locals := p.local // load-consume
+ size := runtimeatomic.LoadAcquintptr(&p.localSize) // load-acquire
+ locals := p.local // load-consume
// Try to steal one element from other procs.
for i := 0; i < int(size); i++ {
l := indexLocal(locals, (pid+i+1)%int(size))
@@ -165,7 +166,7 @@ func (p *Pool) getSlow(pid int) interface{} {
// Try the victim cache. We do this after attempting to steal
// from all primary caches because we want objects in the
// victim cache to age out if at all possible.
- size = atomic.LoadUintptr(&p.victimSize)
+ size = runtimeatomic.Loaduintptr(&p.victimSize)
if uintptr(pid) >= size {
return nil
}
@@ -198,8 +199,8 @@ func (p *Pool) pin() (*poolLocal, int) {
// Since we've disabled preemption, GC cannot happen in between.
// Thus here we must observe local at least as large localSize.
// We can observe a newer/larger local, it is fine (we must observe its zero-initialized-ness).
- s := atomic.LoadUintptr(&p.localSize) // load-acquire
- l := p.local // load-consume
+ s := runtimeatomic.LoadAcquintptr(&p.localSize) // load-acquire
+ l := p.local // load-consume
if uintptr(pid) < s {
return indexLocal(l, pid), pid
}
@@ -225,8 +226,8 @@ func (p *Pool) pinSlow() (*poolLocal, int) {
// If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one.
size := runtime.GOMAXPROCS(0)
local := make([]poolLocal, size)
- atomic.StorePointer(&p.local, unsafe.Pointer(&local[0])) // store-release
- atomic.StoreUintptr(&p.localSize, uintptr(size)) // store-release
+ atomic.StorePointer(&p.local, unsafe.Pointer(&local[0])) // store-release
+ runtimeatomic.StoreReluintptr(&p.localSize, uintptr(size)) // store-release
return &local[pid], pid
}