diff options
author | Carlo Alberto Ferraris <cafxx@strayorange.com> | 2018-11-10 08:28:44 +0900 |
---|---|---|
committer | Brad Fitzpatrick <bradfitz@golang.org> | 2019-03-05 14:59:31 +0000 |
commit | 4c3f26076b6a9853bcc3c7d7e43726c044ac028a (patch) | |
tree | d154c2aed2f92cfb8203c49485d08267243f6378 /src/sync | |
parent | 94cbfc2f7f6b375426381c81d8efe78b12e058c3 (diff) | |
download | go-4c3f26076b6a9853bcc3c7d7e43726c044ac028a.tar.gz go-4c3f26076b6a9853bcc3c7d7e43726c044ac028a.zip |
sync: allow inlining the Mutex.Unlock fast path
Make use of the newly-enabled limited midstack inlining.
Similar changes will be done in followup CLs.
name old time/op new time/op delta
MutexUncontended 19.3ns ± 1% 18.9ns ± 0% -1.92% (p=0.000 n=20+19)
MutexUncontended-4 5.24ns ± 0% 4.75ns ± 1% -9.25% (p=0.000 n=20+20)
MutexUncontended-16 2.10ns ± 0% 2.05ns ± 0% -2.38% (p=0.000 n=15+19)
Mutex 19.6ns ± 0% 19.3ns ± 1% -1.92% (p=0.000 n=20+17)
Mutex-4 54.6ns ± 5% 52.4ns ± 4% -4.09% (p=0.000 n=20+20)
Mutex-16 133ns ± 5% 139ns ± 2% +4.23% (p=0.000 n=20+16)
MutexSlack 33.4ns ± 2% 18.9ns ± 1% -43.56% (p=0.000 n=19+20)
MutexSlack-4 206ns ± 5% 225ns ± 8% +9.12% (p=0.000 n=20+18)
MutexSlack-16 89.4ns ± 1% 98.4ns ± 1% +10.10% (p=0.000 n=18+17)
MutexWork 60.5ns ± 0% 58.2ns ± 3% -3.75% (p=0.000 n=12+20)
MutexWork-4 105ns ± 5% 103ns ± 7% -1.68% (p=0.007 n=20+20)
MutexWork-16 157ns ± 1% 163ns ± 2% +3.90% (p=0.000 n=18+18)
MutexWorkSlack 70.2ns ± 5% 57.7ns ± 1% -17.81% (p=0.000 n=19+20)
MutexWorkSlack-4 277ns ±13% 276ns ±13% ~ (p=0.682 n=20+19)
MutexWorkSlack-16 156ns ± 0% 147ns ± 0% -5.62% (p=0.000 n=16+14)
MutexNoSpin 966ns ± 0% 968ns ± 0% +0.11% (p=0.029 n=15+20)
MutexNoSpin-4 269ns ± 4% 270ns ± 2% ~ (p=0.807 n=20+19)
MutexNoSpin-16 122ns ± 0% 120ns ± 4% -1.63% (p=0.000 n=19+19)
MutexSpin 3.13µs ± 0% 3.13µs ± 1% +0.16% (p=0.004 n=18+20)
MutexSpin-4 826ns ± 1% 832ns ± 2% +0.74% (p=0.000 n=19+16)
MutexSpin-16 397ns ± 1% 395ns ± 0% -0.50% (p=0.000 n=19+17)
RWMutexUncontended 71.4ns ± 0% 69.5ns ± 0% -2.72% (p=0.000 n=16+20)
RWMutexUncontended-4 18.4ns ± 4% 17.5ns ± 0% -4.92% (p=0.000 n=20+18)
RWMutexUncontended-16 8.01ns ± 0% 7.92ns ± 0% -1.15% (p=0.000 n=18+18)
RWMutexWrite100 24.9ns ± 0% 24.9ns ± 1% ~ (p=0.099 n=19+20)
RWMutexWrite100-4 46.5ns ± 3% 46.2ns ± 4% ~ (p=0.253 n=17+19)
RWMutexWrite100-16 68.9ns ± 3% 69.9ns ± 5% +1.46% (p=0.012 n=18+20)
RWMutexWrite10 27.1ns ± 0% 27.0ns ± 2% ~ (p=0.128 n=17+20)
RWMutexWrite10-4 34.8ns ± 1% 34.7ns ± 2% ~ (p=0.180 n=20+18)
RWMutexWrite10-16 37.5ns ± 2% 37.2ns ± 4% -0.89% (p=0.023 n=20+20)
RWMutexWorkWrite100 164ns ± 0% 164ns ± 0% ~ (p=0.106 n=12+20)
RWMutexWorkWrite100-4 186ns ± 3% 193ns ± 3% +3.46% (p=0.000 n=20+20)
RWMutexWorkWrite100-16 204ns ± 2% 210ns ± 3% +2.96% (p=0.000 n=18+20)
RWMutexWorkWrite10 153ns ± 0% 153ns ± 0% -0.20% (p=0.017 n=20+19)
RWMutexWorkWrite10-4 179ns ± 1% 178ns ± 2% ~ (p=0.215 n=19+20)
RWMutexWorkWrite10-16 191ns ± 1% 192ns ± 2% ~ (p=0.166 n=15+19)
linux/amd64 bin/go 14630572 (previous commit 14605947, +24625/+0.17%)
Change-Id: I3f9d1765801fe0b8deb1bc2728b8bba8a7508e23
Reviewed-on: https://go-review.googlesource.com/c/go/+/148958
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/sync')
-rw-r--r-- | src/sync/mutex.go | 12 | ||||
-rw-r--r-- | src/sync/mutex_test.go | 2 | ||||
-rw-r--r-- | src/sync/runtime.go | 4 | ||||
-rw-r--r-- | src/sync/runtime_sema_test.go | 6 | ||||
-rw-r--r-- | src/sync/rwmutex.go | 4 | ||||
-rw-r--r-- | src/sync/waitgroup.go | 2 |
6 files changed, 20 insertions, 10 deletions
diff --git a/src/sync/mutex.go b/src/sync/mutex.go index 4c5582c809..a809993fe0 100644 --- a/src/sync/mutex.go +++ b/src/sync/mutex.go @@ -180,6 +180,14 @@ func (m *Mutex) Unlock() { // Fast path: drop lock bit. new := atomic.AddInt32(&m.state, -mutexLocked) + if new != 0 { + // Outlined slow path to allow inlining the fast path. + // To hide unlockSlow during tracing we skip one extra frame when tracing GoUnblock. + m.unlockSlow(new) + } +} + +func (m *Mutex) unlockSlow(new int32) { if (new+mutexLocked)&mutexLocked == 0 { throw("sync: unlock of unlocked mutex") } @@ -198,7 +206,7 @@ func (m *Mutex) Unlock() { // Grab the right to wake someone. new = (old - 1<<mutexWaiterShift) | mutexWoken if atomic.CompareAndSwapInt32(&m.state, old, new) { - runtime_Semrelease(&m.sema, false) + runtime_Semrelease(&m.sema, false, 1) return } old = m.state @@ -208,6 +216,6 @@ func (m *Mutex) Unlock() { // Note: mutexLocked is not set, the waiter will set it after wakeup. // But mutex is still considered locked if mutexStarving is set, // so new coming goroutines won't acquire it. - runtime_Semrelease(&m.sema, true) + runtime_Semrelease(&m.sema, true, 1) } } diff --git a/src/sync/mutex_test.go b/src/sync/mutex_test.go index 521468439a..e61a853642 100644 --- a/src/sync/mutex_test.go +++ b/src/sync/mutex_test.go @@ -21,7 +21,7 @@ import ( func HammerSemaphore(s *uint32, loops int, cdone chan bool) { for i := 0; i < loops; i++ { Runtime_Semacquire(s) - Runtime_Semrelease(s, false) + Runtime_Semrelease(s, false, 0) } cdone <- true } diff --git a/src/sync/runtime.go b/src/sync/runtime.go index b6b9e480a4..8b20b0f6f7 100644 --- a/src/sync/runtime.go +++ b/src/sync/runtime.go @@ -22,7 +22,9 @@ func runtime_SemacquireMutex(s *uint32, lifo bool) // It is intended as a simple wakeup primitive for use by the synchronization // library and should not be used directly. // If handoff is true, pass count directly to the first waiter. -func runtime_Semrelease(s *uint32, handoff bool) +// skipframes is the number of frames to omit during tracing, counting from +// runtime_Semrelease's caller. +func runtime_Semrelease(s *uint32, handoff bool, skipframes int) // Approximation of notifyList in runtime/sema.go. Size and alignment must // agree. diff --git a/src/sync/runtime_sema_test.go b/src/sync/runtime_sema_test.go index a680847edf..152cf0e94a 100644 --- a/src/sync/runtime_sema_test.go +++ b/src/sync/runtime_sema_test.go @@ -18,7 +18,7 @@ func BenchmarkSemaUncontended(b *testing.B) { b.RunParallel(func(pb *testing.PB) { sem := new(PaddedSem) for pb.Next() { - Runtime_Semrelease(&sem.sem, false) + Runtime_Semrelease(&sem.sem, false, 0) Runtime_Semacquire(&sem.sem) } }) @@ -44,7 +44,7 @@ func benchmarkSema(b *testing.B, block, work bool) { b.RunParallel(func(pb *testing.PB) { foo := 0 for pb.Next() { - Runtime_Semrelease(&sem, false) + Runtime_Semrelease(&sem, false, 0) if work { for i := 0; i < 100; i++ { foo *= 2 @@ -54,7 +54,7 @@ func benchmarkSema(b *testing.B, block, work bool) { Runtime_Semacquire(&sem) } _ = foo - Runtime_Semrelease(&sem, false) + Runtime_Semrelease(&sem, false, 0) }) } diff --git a/src/sync/rwmutex.go b/src/sync/rwmutex.go index 16a2f9227c..24dd78cbe7 100644 --- a/src/sync/rwmutex.go +++ b/src/sync/rwmutex.go @@ -73,7 +73,7 @@ func (rw *RWMutex) RUnlock() { // A writer is pending. if atomic.AddInt32(&rw.readerWait, -1) == 0 { // The last reader unblocks the writer. - runtime_Semrelease(&rw.writerSem, false) + runtime_Semrelease(&rw.writerSem, false, 0) } } if race.Enabled { @@ -125,7 +125,7 @@ func (rw *RWMutex) Unlock() { } // Unblock blocked readers, if any. for i := 0; i < int(r); i++ { - runtime_Semrelease(&rw.readerSem, false) + runtime_Semrelease(&rw.readerSem, false, 0) } // Allow other writers to proceed. rw.w.Unlock() diff --git a/src/sync/waitgroup.go b/src/sync/waitgroup.go index 99dd400006..e81a493dea 100644 --- a/src/sync/waitgroup.go +++ b/src/sync/waitgroup.go @@ -90,7 +90,7 @@ func (wg *WaitGroup) Add(delta int) { // Reset waiters count to 0. *statep = 0 for ; w != 0; w-- { - runtime_Semrelease(semap, false) + runtime_Semrelease(semap, false, 0) } } |