aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/trace.go
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2016-04-05 15:29:14 +0200
committerDmitry Vyukov <dvyukov@google.com>2016-04-23 15:57:05 +0000
commita3703618eadeb74b60f2cb9a23fabe178d4b141d (patch)
treef582d339b698327a75464870224644150c893ab9 /src/runtime/trace.go
parentba966f5d891191450f286859c35bf8a7fa49cde2 (diff)
downloadgo-a3703618eadeb74b60f2cb9a23fabe178d4b141d.tar.gz
go-a3703618eadeb74b60f2cb9a23fabe178d4b141d.zip
runtime: use per-goroutine sequence numbers in tracer
Currently tracer uses global sequencer and it introduces significant slowdown on parallel machines (up to 10x). Replace the global sequencer with per-goroutine sequencer. If we assign per-goroutine sequence numbers to only 3 types of events (start, unblock and syscall exit), it is enough to restore consistent partial ordering of all events. Even these events don't need sequence numbers all the time (if goroutine starts on the same P where it was unblocked, then start does not need sequence number). The burden of restoring the order is put on trace parser. Details of the algorithm are described in the comments. On http benchmark with GOMAXPROCS=48: no tracing: 5026 ns/op tracing: 27803 ns/op (+453%) with this change: 6369 ns/op (+26%, mostly for traceback) Also trace size is reduced by ~22%. Average event size before: 4.63 bytes/event, after: 3.62 bytes/event. Besides running trace tests, I've also tested with manually broken cputicks (random skew for each event, per-P skew and episodic random skew). In all cases broken timestamps were detected and no test failures. Change-Id: I078bde421ccc386a66f6c2051ab207bcd5613efa Reviewed-on: https://go-review.googlesource.com/21512 Run-TryBot: Dmitry Vyukov <dvyukov@google.com> Reviewed-by: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/runtime/trace.go')
-rw-r--r--src/runtime/trace.go121
1 files changed, 67 insertions, 54 deletions
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 06fbdfac94..092f941f0c 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -13,7 +13,6 @@
package runtime
import (
- "runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
@@ -27,21 +26,21 @@ const (
traceEvGomaxprocs = 4 // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
traceEvProcStart = 5 // start of P [timestamp, thread id]
traceEvProcStop = 6 // stop of P [timestamp]
- traceEvGCStart = 7 // GC start [timestamp, stack id]
+ traceEvGCStart = 7 // GC start [timestamp, seq, stack id]
traceEvGCDone = 8 // GC done [timestamp]
traceEvGCScanStart = 9 // GC scan start [timestamp]
traceEvGCScanDone = 10 // GC scan done [timestamp]
traceEvGCSweepStart = 11 // GC sweep start [timestamp, stack id]
traceEvGCSweepDone = 12 // GC sweep done [timestamp]
traceEvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
- traceEvGoStart = 14 // goroutine starts running [timestamp, goroutine id]
+ traceEvGoStart = 14 // goroutine starts running [timestamp, goroutine id, seq]
traceEvGoEnd = 15 // goroutine ends [timestamp]
traceEvGoStop = 16 // goroutine stops (like in select{}) [timestamp, stack]
traceEvGoSched = 17 // goroutine calls Gosched [timestamp, stack]
traceEvGoPreempt = 18 // goroutine is preempted [timestamp, stack]
traceEvGoSleep = 19 // goroutine calls Sleep [timestamp, stack]
traceEvGoBlock = 20 // goroutine blocks [timestamp, stack]
- traceEvGoUnblock = 21 // goroutine is unblocked [timestamp, goroutine id, stack]
+ traceEvGoUnblock = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
traceEvGoBlockSend = 22 // goroutine blocks on chan send [timestamp, stack]
traceEvGoBlockRecv = 23 // goroutine blocks on chan recv [timestamp, stack]
traceEvGoBlockSelect = 24 // goroutine blocks on select [timestamp, stack]
@@ -49,7 +48,7 @@ const (
traceEvGoBlockCond = 26 // goroutine blocks on Cond [timestamp, stack]
traceEvGoBlockNet = 27 // goroutine blocks on network [timestamp, stack]
traceEvGoSysCall = 28 // syscall enter [timestamp, stack]
- traceEvGoSysExit = 29 // syscall exit [timestamp, goroutine id, real timestamp]
+ traceEvGoSysExit = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
traceEvGoSysBlock = 30 // syscall blocks [timestamp]
traceEvGoWaiting = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
traceEvGoInSyscall = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
@@ -58,7 +57,10 @@ const (
traceEvTimerGoroutine = 35 // denotes timer goroutine [timer goroutine id]
traceEvFutileWakeup = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
traceEvString = 37 // string dictionary entry [ID, length, string]
- traceEvCount = 38
+ traceEvGoStartLocal = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+ traceEvGoUnblockLocal = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+ traceEvGoSysExitLocal = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+ traceEvCount = 41
)
const (
@@ -105,6 +107,7 @@ var trace struct {
ticksEnd int64 // cputicks when tracing was stopped
timeStart int64 // nanotime when tracing was started
timeEnd int64 // nanotime when tracing was stopped
+ seqGC uint64 // GC start/done sequencer
reading traceBufPtr // buffer currently handed off to user
empty traceBufPtr // stack of empty buffers
fullHead traceBufPtr // queue of full buffers
@@ -122,31 +125,9 @@ var trace struct {
buf traceBufPtr // global trace buffer, used when running without a p
}
-var traceseq uint64 // global trace sequence number
-
-// tracestamp returns a consistent sequence number, time stamp pair
-// for use in a trace. We need to make sure that time stamp ordering
-// (assuming synchronized CPUs) and sequence ordering match.
-// To do that, we increment traceseq, grab ticks, and increment traceseq again.
-// We treat odd traceseq as a sign that another thread is in the middle
-// of the sequence and spin until it is done.
-// Not splitting stack to avoid preemption, just in case the call sites
-// that used to call xadd64 and cputicks are sensitive to that.
-//go:nosplit
-func tracestamp() (seq uint64, ts int64) {
- seq = atomic.Load64(&traceseq)
- for seq&1 != 0 || !atomic.Cas64(&traceseq, seq, seq+1) {
- seq = atomic.Load64(&traceseq)
- }
- ts = cputicks()
- atomic.Store64(&traceseq, seq+2)
- return seq >> 1, ts
-}
-
// traceBufHeader is per-P tracing buffer.
type traceBufHeader struct {
link traceBufPtr // in trace.empty/full
- lastSeq uint64 // sequence number of last event
lastTicks uint64 // when we wrote the last event
pos int // next write offset in arr
stk [traceStackSize]uintptr // scratch buffer for traceback
@@ -194,13 +175,6 @@ func StartTrace() error {
return errorString("tracing is already enabled")
}
- trace.seqStart, trace.ticksStart = tracestamp()
- trace.timeStart = nanotime()
- trace.headerWritten = false
- trace.footerWritten = false
- trace.strings = make(map[string]uint64)
- trace.stringSeq = 0
-
// Can't set trace.enabled yet. While the world is stopped, exitsyscall could
// already emit a delayed event (see exitTicks in exitsyscall) if we set trace.enabled here.
// That would lead to an inconsistent trace:
@@ -213,12 +187,15 @@ func StartTrace() error {
for _, gp := range allgs {
status := readgstatus(gp)
if status != _Gdead {
- traceGoCreate(gp, gp.startpc)
+ traceGoCreate(gp, gp.startpc) // also resets gp.traceseq/tracelastp
}
if status == _Gwaiting {
+ // traceEvGoWaiting is implied to have seq=1.
+ gp.traceseq++
traceEvent(traceEvGoWaiting, -1, uint64(gp.goid))
}
if status == _Gsyscall {
+ gp.traceseq++
traceEvent(traceEvGoInSyscall, -1, uint64(gp.goid))
} else {
gp.sysblocktraced = false
@@ -226,6 +203,17 @@ func StartTrace() error {
}
traceProcStart()
traceGoStart()
+ // Note: ticksStart needs to be set after we emit traceEvGoInSyscall events.
+ // If we do it the other way around, it is possible that exitsyscall will
+ // query sysexitticks after ticksStart but before traceEvGoInSyscall timestamp.
+ // It will lead to a false conclusion that cputicks is broken.
+ trace.ticksStart = cputicks()
+ trace.timeStart = nanotime()
+ trace.headerWritten = false
+ trace.footerWritten = false
+ trace.strings = make(map[string]uint64)
+ trace.stringSeq = 0
+ trace.seqGC = 0
_g_.m.startingtrace = false
trace.enabled = true
@@ -382,11 +370,9 @@ func ReadTrace() []byte {
var data []byte
data = append(data, traceEvFrequency|0<<traceArgCountShift)
data = traceAppend(data, uint64(freq))
- data = traceAppend(data, 0)
if timers.gp != nil {
data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift)
data = traceAppend(data, uint64(timers.gp.goid))
- data = traceAppend(data, 0)
}
// This will emit a bunch of full buffers, we will pick them up
// on the next iteration.
@@ -494,19 +480,14 @@ func traceEvent(ev byte, skip int, args ...uint64) {
(*bufp).set(buf)
}
- seq, ticksraw := tracestamp()
- seqDiff := seq - buf.lastSeq
- ticks := uint64(ticksraw) / traceTickDiv
+ ticks := uint64(cputicks()) / traceTickDiv
tickDiff := ticks - buf.lastTicks
if buf.pos == 0 {
buf.byte(traceEvBatch | 1<<traceArgCountShift)
buf.varint(uint64(pid))
- buf.varint(seq)
buf.varint(ticks)
- seqDiff = 0
tickDiff = 0
}
- buf.lastSeq = seq
buf.lastTicks = ticks
narg := byte(len(args))
if skip >= 0 {
@@ -525,7 +506,6 @@ func traceEvent(ev byte, skip int, args ...uint64) {
buf.varint(0)
lenp = &buf.arr[buf.pos-1]
}
- buf.varint(seqDiff)
buf.varint(tickDiff)
for _, a := range args {
buf.varint(a)
@@ -892,7 +872,8 @@ func traceProcStop(pp *p) {
}
func traceGCStart() {
- traceEvent(traceEvGCStart, 3)
+ traceEvent(traceEvGCStart, 3, trace.seqGC)
+ trace.seqGC++
}
func traceGCDone() {
@@ -916,13 +897,23 @@ func traceGCSweepDone() {
}
func traceGoCreate(newg *g, pc uintptr) {
+ newg.traceseq = 0
+ newg.tracelastp = getg().m.p
// +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum.
id := trace.stackTab.put([]uintptr{pc + sys.PCQuantum})
traceEvent(traceEvGoCreate, 2, uint64(newg.goid), uint64(id))
}
func traceGoStart() {
- traceEvent(traceEvGoStart, -1, uint64(getg().m.curg.goid))
+ _g_ := getg().m.curg
+ _p_ := _g_.m.p
+ _g_.traceseq++
+ if _g_.tracelastp == _p_ {
+ traceEvent(traceEvGoStartLocal, -1, uint64(_g_.goid))
+ } else {
+ _g_.tracelastp = _p_
+ traceEvent(traceEvGoStart, -1, uint64(_g_.goid), _g_.traceseq)
+ }
}
func traceGoEnd() {
@@ -930,10 +921,14 @@ func traceGoEnd() {
}
func traceGoSched() {
+ _g_ := getg()
+ _g_.tracelastp = _g_.m.p
traceEvent(traceEvGoSched, 1)
}
func traceGoPreempt() {
+ _g_ := getg()
+ _g_.tracelastp = _g_.m.p
traceEvent(traceEvGoPreempt, 1)
}
@@ -945,19 +940,37 @@ func traceGoPark(traceEv byte, skip int, gp *g) {
}
func traceGoUnpark(gp *g, skip int) {
- traceEvent(traceEvGoUnblock, skip, uint64(gp.goid))
+ _p_ := getg().m.p
+ gp.traceseq++
+ if gp.tracelastp == _p_ {
+ traceEvent(traceEvGoUnblockLocal, skip, uint64(gp.goid))
+ } else {
+ gp.tracelastp = _p_
+ traceEvent(traceEvGoUnblock, skip, uint64(gp.goid), gp.traceseq)
+ }
}
func traceGoSysCall() {
traceEvent(traceEvGoSysCall, 1)
}
-func traceGoSysExit(seq uint64, ts int64) {
- if int64(seq)-int64(trace.seqStart) < 0 {
- // The timestamp was obtained during a previous tracing session, ignore.
- return
- }
- traceEvent(traceEvGoSysExit, -1, uint64(getg().m.curg.goid), seq, uint64(ts)/traceTickDiv)
+func traceGoSysExit(ts int64) {
+ if ts != 0 && ts < trace.ticksStart {
+ // There is a race between the code that initializes sysexitticks
+ // (in exitsyscall, which runs without a P, and therefore is not
+ // stopped with the rest of the world) and the code that initializes
+ // a new trace. The recorded sysexitticks must therefore be treated
+ // as "best effort". If they are valid for this trace, then great,
+ // use them for greater accuracy. But if they're not valid for this
+ // trace, assume that the trace was started after the actual syscall
+ // exit (but before we actually managed to start the goroutine,
+ // aka right now), and assign a fresh time stamp to keep the log consistent.
+ ts = 0
+ }
+ _g_ := getg().m.curg
+ _g_.traceseq++
+ _g_.tracelastp = _g_.m.p
+ traceEvent(traceEvGoSysExit, -1, uint64(_g_.goid), _g_.traceseq, uint64(ts)/traceTickDiv)
}
func traceGoSysBlock(pp *p) {