aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/runtime/lockrank.go22
-rw-r--r--src/runtime/proc.go168
-rw-r--r--src/time/sleep_test.go187
3 files changed, 294 insertions, 83 deletions
diff --git a/src/runtime/lockrank.go b/src/runtime/lockrank.go
index 0cbbfc4f45..3f9b087856 100644
--- a/src/runtime/lockrank.go
+++ b/src/runtime/lockrank.go
@@ -41,12 +41,12 @@ const (
lockRankCpuprof
lockRankSweep
+ lockRankPollDesc
lockRankSched
lockRankDeadlock
lockRankPanic
lockRankAllg
lockRankAllp
- lockRankPollDesc
lockRankTimers // Multiple timers locked simultaneously in destroy()
lockRankItab
@@ -120,12 +120,12 @@ var lockNames = []string{
lockRankCpuprof: "cpuprof",
lockRankSweep: "sweep",
+ lockRankPollDesc: "pollDesc",
lockRankSched: "sched",
lockRankDeadlock: "deadlock",
lockRankPanic: "panic",
lockRankAllg: "allg",
lockRankAllp: "allp",
- lockRankPollDesc: "pollDesc",
lockRankTimers: "timers",
lockRankItab: "itab",
@@ -182,14 +182,14 @@ func (rank lockRank) String() string {
return lockNames[rank]
}
-// lockPartialOrder is a partial order among the various lock types, listing the immediate
-// ordering that has actually been observed in the runtime. Each entry (which
-// corresponds to a particular lock rank) specifies the list of locks that can be
-// already be held immediately "above" it.
+// lockPartialOrder is a partial order among the various lock types, listing the
+// immediate ordering that has actually been observed in the runtime. Each entry
+// (which corresponds to a particular lock rank) specifies the list of locks
+// that can already be held immediately "above" it.
//
-// So, for example, the lockRankSched entry shows that all the locks preceding it in
-// rank can actually be held. The fin lock shows that only the sched, timers, or
-// hchan lock can be held immediately above it when it is acquired.
+// So, for example, the lockRankSched entry shows that all the locks preceding
+// it in rank can actually be held. The allp lock shows that only the sysmon or
+// sched lock can be held immediately above it when it is acquired.
var lockPartialOrder [][]lockRank = [][]lockRank{
lockRankDummy: {},
lockRankSysmon: {},
@@ -199,12 +199,12 @@ var lockPartialOrder [][]lockRank = [][]lockRank{
lockRankAssistQueue: {},
lockRankCpuprof: {},
lockRankSweep: {},
- lockRankSched: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep},
+ lockRankPollDesc: {},
+ lockRankSched: {lockRankSysmon, lockRankScavenge, lockRankForcegc, lockRankSweepWaiters, lockRankAssistQueue, lockRankCpuprof, lockRankSweep, lockRankPollDesc},
lockRankDeadlock: {lockRankDeadlock},
lockRankPanic: {lockRankDeadlock},
lockRankAllg: {lockRankSysmon, lockRankSched, lockRankPanic},
lockRankAllp: {lockRankSysmon, lockRankSched},
- lockRankPollDesc: {},
lockRankTimers: {lockRankSysmon, lockRankScavenge, lockRankSched, lockRankAllp, lockRankPollDesc, lockRankTimers},
lockRankItab: {},
lockRankReflectOffs: {lockRankItab},
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index ebecc92745..6feecef985 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -2264,11 +2264,16 @@ func handoffp(_p_ *p) {
startm(_p_, false)
return
}
- if when := nobarrierWakeTime(_p_); when != 0 {
- wakeNetPoller(when)
- }
+
+ // The scheduler lock cannot be held when calling wakeNetPoller below
+ // because wakeNetPoller may call wakep which may call startm.
+ when := nobarrierWakeTime(_p_)
pidleput(_p_)
unlock(&sched.lock)
+
+ if when != 0 {
+ wakeNetPoller(when)
+ }
}
// Tries to add one more P to execute G's.
@@ -2477,40 +2482,33 @@ top:
_g_.m.spinning = true
atomic.Xadd(&sched.nmspinning, 1)
}
- for i := 0; i < 4; i++ {
+ const stealTries = 4
+ for i := 0; i < stealTries; i++ {
+ stealTimersOrRunNextG := i == stealTries-1
+
for enum := stealOrder.start(fastrand()); !enum.done(); enum.next() {
if sched.gcwaiting != 0 {
goto top
}
- stealRunNextG := i > 2 // first look for ready queues with more than 1 g
p2 := allp[enum.position()]
if _p_ == p2 {
continue
}
- // Don't bother to attempt to steal if p2 is idle.
- if !idlepMask.read(enum.position()) {
- if gp := runqsteal(_p_, p2, stealRunNextG); gp != nil {
- return gp, false
- }
- }
-
- // Consider stealing timers from p2.
- // This call to checkTimers is the only place where
- // we hold a lock on a different P's timers.
- // Lock contention can be a problem here, so
- // initially avoid grabbing the lock if p2 is running
- // and is not marked for preemption. If p2 is running
- // and not being preempted we assume it will handle its
- // own timers.
+ // Steal timers from p2. This call to checkTimers is the only place
+ // where we might hold a lock on a different P's timers. We do this
+ // once on the last pass before checking runnext because stealing
+ // from the other P's runnext should be the last resort, so if there
+ // are timers to steal do that first.
//
- // If we're still looking for work after checking all
- // the P's, then go ahead and steal from an active P.
+ // We only check timers on one of the stealing iterations because
+ // the time stored in now doesn't change in this loop and checking
+ // the timers for each P more than once with the same value of now
+ // is probably a waste of time.
//
- // TODO(prattmic): Maintain a global look-aside similar
- // to idlepMask to avoid looking at p2 if it can't
- // possibly have timers.
- if i > 2 || (i > 1 && shouldStealTimers(p2)) {
+ // TODO(prattmic): Maintain a global look-aside similar to idlepMask
+ // to avoid looking at p2 if it can't possibly have timers.
+ if stealTimersOrRunNextG {
tnow, w, ran := checkTimers(p2, now)
now = tnow
if w != 0 && (pollUntil == 0 || w < pollUntil) {
@@ -2531,6 +2529,13 @@ top:
ranTimer = true
}
}
+
+ // Don't bother to attempt to steal if p2 is idle.
+ if !idlepMask.read(enum.position()) {
+ if gp := runqsteal(_p_, p2, stealTimersOrRunNextG); gp != nil {
+ return gp, false
+ }
+ }
}
}
if ranTimer {
@@ -2606,7 +2611,7 @@ stop:
// drop nmspinning first and then check all per-P queues again (with
// #StoreLoad memory barrier in between). If we do it the other way around,
// another thread can submit a goroutine after we've checked all run queues
- // but before we drop nmspinning; as the result nobody will unpark a thread
+ // but before we drop nmspinning; as a result nobody will unpark a thread
// to run the goroutine.
// If we discover new work below, we need to restore m.spinning as a signal
// for resetspinning to unpark a new worker thread (because there can be more
@@ -2640,6 +2645,35 @@ stop:
}
}
+ // Similar to above, check for timer creation or expiry concurrently with
+ // transitioning from spinning to non-spinning. Note that we cannot use
+ // checkTimers here because it calls adjusttimers which may need to allocate
+ // memory, and that isn't allowed when we don't have an active P.
+ for _, _p_ := range allpSnapshot {
+ // This is similar to nobarrierWakeTime, but minimizes calls to
+ // nanotime.
+ if atomic.Load(&_p_.adjustTimers) > 0 {
+ if now == 0 {
+ now = nanotime()
+ }
+ pollUntil = now
+ } else {
+ w := int64(atomic.Load64(&_p_.timer0When))
+ if w != 0 && (pollUntil == 0 || w < pollUntil) {
+ pollUntil = w
+ }
+ }
+ }
+ if pollUntil != 0 {
+ if now == 0 {
+ now = nanotime()
+ }
+ delta = pollUntil - now
+ if delta < 0 {
+ delta = 0
+ }
+ }
+
// Check for idle-priority GC work again.
if gcBlackenEnabled != 0 && gcMarkWorkAvailable(nil) {
lock(&sched.lock)
@@ -2735,9 +2769,9 @@ func pollWork() bool {
return false
}
-// wakeNetPoller wakes up the thread sleeping in the network poller,
-// if there is one, and if it isn't going to wake up anyhow before
-// the when argument.
+// wakeNetPoller wakes up the thread sleeping in the network poller if it isn't
+// going to wake up before the when argument; or it wakes an idle P to service
+// timers and the network poller if there isn't one already.
func wakeNetPoller(when int64) {
if atomic.Load64(&sched.lastpoll) == 0 {
// In findrunnable we ensure that when polling the pollUntil
@@ -2748,6 +2782,10 @@ func wakeNetPoller(when int64) {
if pollerPollUntil == 0 || pollerPollUntil > when {
netpollBreak()
}
+ } else {
+ // There are no threads in the network poller, try to get
+ // one there so it can handle new timers.
+ wakep()
}
}
@@ -3034,25 +3072,6 @@ func checkTimers(pp *p, now int64) (rnow, pollUntil int64, ran bool) {
return rnow, pollUntil, ran
}
-// shouldStealTimers reports whether we should try stealing the timers from p2.
-// We don't steal timers from a running P that is not marked for preemption,
-// on the assumption that it will run its own timers. This reduces
-// contention on the timers lock.
-func shouldStealTimers(p2 *p) bool {
- if p2.status != _Prunning {
- return true
- }
- mp := p2.m.ptr()
- if mp == nil || mp.locks > 0 {
- return false
- }
- gp := mp.curg
- if gp == nil || gp.atomicstatus != _Grunning || !gp.preempt {
- return false
- }
- return true
-}
-
func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
unlock((*mutex)(lock))
return true
@@ -4603,7 +4622,7 @@ func procresize(nprocs int32) *p {
}
sched.procresizetime = now
- maskWords := (nprocs+31) / 32
+ maskWords := (nprocs + 31) / 32
// Grow allp if necessary.
if nprocs > int32(len(allp)) {
@@ -4927,11 +4946,28 @@ func sysmon() {
}
usleep(delay)
mDoFixup()
+
+ // sysmon should not enter deep sleep if schedtrace is enabled so that
+ // it can print that information at the right time.
+ //
+ // It should also not enter deep sleep if there are any active P's so
+ // that it can retake P's from syscalls, preempt long running G's, and
+ // poll the network if all P's are busy for long stretches.
+ //
+ // It should wakeup from deep sleep if any P's become active either due
+ // to exiting a syscall or waking up due to a timer expiring so that it
+ // can resume performing those duties. If it wakes from a syscall it
+ // resets idle and delay as a bet that since it had retaken a P from a
+ // syscall before, it may need to do it again shortly after the
+ // application starts work again. It does not reset idle when waking
+ // from a timer to avoid adding system load to applications that spend
+ // most of their time sleeping.
now := nanotime()
- next, _ := timeSleepUntil()
if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
lock(&sched.lock)
if atomic.Load(&sched.gcwaiting) != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs) {
+ syscallWake := false
+ next, _ := timeSleepUntil()
if next > now {
atomic.Store(&sched.sysmonwait, 1)
unlock(&sched.lock)
@@ -4945,33 +4981,27 @@ func sysmon() {
if shouldRelax {
osRelax(true)
}
- notetsleep(&sched.sysmonnote, sleep)
+ syscallWake = notetsleep(&sched.sysmonnote, sleep)
mDoFixup()
if shouldRelax {
osRelax(false)
}
- now = nanotime()
- next, _ = timeSleepUntil()
lock(&sched.lock)
atomic.Store(&sched.sysmonwait, 0)
noteclear(&sched.sysmonnote)
}
- idle = 0
- delay = 20
+ if syscallWake {
+ idle = 0
+ delay = 20
+ }
}
unlock(&sched.lock)
}
+
lock(&sched.sysmonlock)
- {
- // If we spent a long time blocked on sysmonlock
- // then we want to update now and next since it's
- // likely stale.
- now1 := nanotime()
- if now1-now > 50*1000 /* 50µs */ {
- next, _ = timeSleepUntil()
- }
- now = now1
- }
+ // Update now in case we blocked on sysmonnote or spent a long time
+ // blocked on schedlock or sysmonlock above.
+ now = nanotime()
// trigger libc interceptors if needed
if *cgo_yield != nil {
@@ -4996,12 +5026,6 @@ func sysmon() {
}
}
mDoFixup()
- if next < now {
- // There are timers that should have already run,
- // perhaps because there is an unpreemptible P.
- // Try to start an M to run them.
- startm(nil, false)
- }
if atomic.Load(&scavenge.sysmonWake) != 0 {
// Kick the scavenger awake if someone requested it.
wakeScavenger()
diff --git a/src/time/sleep_test.go b/src/time/sleep_test.go
index f5678020b9..ba0016bf49 100644
--- a/src/time/sleep_test.go
+++ b/src/time/sleep_test.go
@@ -501,3 +501,190 @@ func TestZeroTimerStopPanics(t *testing.T) {
var tr Timer
tr.Stop()
}
+
+// Benchmark timer latency when the thread that creates the timer is busy with
+// other work and the timers must be serviced by other threads.
+// https://golang.org/issue/38860
+func BenchmarkParallelTimerLatency(b *testing.B) {
+ gmp := runtime.GOMAXPROCS(0)
+ if gmp < 2 || runtime.NumCPU() < gmp {
+ b.Skip("skipping with GOMAXPROCS < 2 or NumCPU < GOMAXPROCS")
+ }
+
+ // allocate memory now to avoid GC interference later.
+ timerCount := gmp - 1
+ stats := make([]struct {
+ sum float64
+ max Duration
+ count int64
+ _ [5]int64 // cache line padding
+ }, timerCount)
+
+ // Ensure the time to start new threads to service timers will not pollute
+ // the results.
+ warmupScheduler(gmp)
+
+ // Note that other than the AfterFunc calls this benchmark is measuring it
+ // avoids using any other timers. In particular, the main goroutine uses
+ // doWork to spin for some durations because up through Go 1.15 if all
+ // threads are idle sysmon could leave deep sleep when we wake.
+
+ // Ensure sysmon is in deep sleep.
+ doWork(30 * Millisecond)
+
+ b.ResetTimer()
+
+ const delay = Millisecond
+ var wg sync.WaitGroup
+ var count int32
+ for i := 0; i < b.N; i++ {
+ wg.Add(timerCount)
+ atomic.StoreInt32(&count, 0)
+ for j := 0; j < timerCount; j++ {
+ j := j
+ expectedWakeup := Now().Add(delay)
+ AfterFunc(delay, func() {
+ late := Since(expectedWakeup)
+ if late < 0 {
+ late = 0
+ }
+ stats[j].count++
+ stats[j].sum += float64(late.Nanoseconds())
+ if late > stats[j].max {
+ stats[j].max = late
+ }
+ atomic.AddInt32(&count, 1)
+ for atomic.LoadInt32(&count) < int32(timerCount) {
+ // spin until all timers fired
+ }
+ wg.Done()
+ })
+ }
+
+ for atomic.LoadInt32(&count) < int32(timerCount) {
+ // spin until all timers fired
+ }
+ wg.Wait()
+
+ // Spin for a bit to let the other scheduler threads go idle before the
+ // next round.
+ doWork(Millisecond)
+ }
+ var total float64
+ var samples float64
+ max := Duration(0)
+ for _, s := range stats {
+ if s.max > max {
+ max = s.max
+ }
+ total += s.sum
+ samples += float64(s.count)
+ }
+ b.ReportMetric(0, "ns/op")
+ b.ReportMetric(total/samples, "avg-late-ns")
+ b.ReportMetric(float64(max.Nanoseconds()), "max-late-ns")
+}
+
+// Benchmark timer latency with staggered wakeup times and varying CPU bound
+// workloads. https://golang.org/issue/38860
+func BenchmarkStaggeredTickerLatency(b *testing.B) {
+ gmp := runtime.GOMAXPROCS(0)
+ if gmp < 2 || runtime.NumCPU() < gmp {
+ b.Skip("skipping with GOMAXPROCS < 2 or NumCPU < GOMAXPROCS")
+ }
+
+ const delay = 3 * Millisecond
+
+ for _, dur := range []Duration{300 * Microsecond, 2 * Millisecond} {
+ b.Run(fmt.Sprintf("work-dur=%s", dur), func(b *testing.B) {
+ for tickersPerP := 1; tickersPerP < int(delay/dur)+1; tickersPerP++ {
+ tickerCount := gmp * tickersPerP
+ b.Run(fmt.Sprintf("tickers-per-P=%d", tickersPerP), func(b *testing.B) {
+ // allocate memory now to avoid GC interference later.
+ stats := make([]struct {
+ sum float64
+ max Duration
+ count int64
+ _ [5]int64 // cache line padding
+ }, tickerCount)
+
+ // Ensure the time to start new threads to service timers
+ // will not pollute the results.
+ warmupScheduler(gmp)
+
+ b.ResetTimer()
+
+ var wg sync.WaitGroup
+ wg.Add(tickerCount)
+ for j := 0; j < tickerCount; j++ {
+ j := j
+ doWork(delay / Duration(gmp))
+ expectedWakeup := Now().Add(delay)
+ ticker := NewTicker(delay)
+ go func(c int, ticker *Ticker, firstWake Time) {
+ defer ticker.Stop()
+
+ for ; c > 0; c-- {
+ <-ticker.C
+ late := Since(expectedWakeup)
+ if late < 0 {
+ late = 0
+ }
+ stats[j].count++
+ stats[j].sum += float64(late.Nanoseconds())
+ if late > stats[j].max {
+ stats[j].max = late
+ }
+ expectedWakeup = expectedWakeup.Add(delay)
+ doWork(dur)
+ }
+ wg.Done()
+ }(b.N, ticker, expectedWakeup)
+ }
+ wg.Wait()
+
+ var total float64
+ var samples float64
+ max := Duration(0)
+ for _, s := range stats {
+ if s.max > max {
+ max = s.max
+ }
+ total += s.sum
+ samples += float64(s.count)
+ }
+ b.ReportMetric(0, "ns/op")
+ b.ReportMetric(total/samples, "avg-late-ns")
+ b.ReportMetric(float64(max.Nanoseconds()), "max-late-ns")
+ })
+ }
+ })
+ }
+}
+
+// warmupScheduler ensures the scheduler has at least targetThreadCount threads
+// in its thread pool.
+func warmupScheduler(targetThreadCount int) {
+ var wg sync.WaitGroup
+ var count int32
+ for i := 0; i < targetThreadCount; i++ {
+ wg.Add(1)
+ go func() {
+ atomic.AddInt32(&count, 1)
+ for atomic.LoadInt32(&count) < int32(targetThreadCount) {
+ // spin until all threads started
+ }
+
+ // spin a bit more to ensure they are all running on separate CPUs.
+ doWork(Millisecond)
+ wg.Done()
+ }()
+ }
+ wg.Wait()
+}
+
+func doWork(dur Duration) {
+ start := Now()
+ for Since(start) < dur {
+ }
+}