diff options
author | Austin Clements <austin@google.com> | 2020-04-16 08:13:58 -0400 |
---|---|---|
committer | Austin Clements <austin@google.com> | 2020-04-29 21:29:21 +0000 |
commit | 9d812cfa5cbb1f573d61c452c864072270526753 (patch) | |
tree | bf2731ef44747066f0482780f2cb336cbb6b4f71 | |
parent | ee7d9f1c37c768b859c27dec2a897b12fa928c4e (diff) | |
download | go-9d812cfa5cbb1f573d61c452c864072270526753.tar.gz go-9d812cfa5cbb1f573d61c452c864072270526753.zip |
cmd/compile,runtime: stack maps only at calls, remove register maps
Currently, we emit stack maps and register maps at almost every
instruction. This was originally intended to support non-cooperative
preemption, but was only ever used for debug call injection. Now debug
call injection also uses conservative frame scanning. As a result,
stack maps are only needed at call sites and register maps aren't
needed at all except that we happen to also encode unsafe-point
information in the register map PCDATA stream.
This CL reduces stack maps to only appear at calls, and replace full
register maps with just safe/unsafe-point information.
This is all protected by the go115ReduceLiveness feature flag, which
is defined in both runtime and cmd/compile.
This CL significantly reduces binary sizes and also speeds up compiles
and links:
name old exe-bytes new exe-bytes delta
BinGoSize 15.0MB ± 0% 14.1MB ± 0% -5.72%
name old pcln-bytes new pcln-bytes delta
BinGoSize 3.14MB ± 0% 2.48MB ± 0% -21.08%
name old time/op new time/op delta
Template 178ms ± 7% 172ms ±14% -3.59% (p=0.005 n=19+19)
Unicode 71.0ms ±12% 69.8ms ±10% ~ (p=0.126 n=18+18)
GoTypes 655ms ± 8% 615ms ± 8% -6.11% (p=0.000 n=19+19)
Compiler 3.27s ± 6% 3.15s ± 7% -3.69% (p=0.001 n=20+20)
SSA 7.10s ± 5% 6.85s ± 8% -3.53% (p=0.001 n=19+20)
Flate 124ms ±15% 116ms ±22% -6.57% (p=0.024 n=18+19)
GoParser 156ms ±26% 147ms ±34% ~ (p=0.070 n=19+19)
Reflect 406ms ± 9% 387ms ±21% -4.69% (p=0.028 n=19+20)
Tar 163ms ±15% 162ms ±27% ~ (p=0.370 n=19+19)
XML 223ms ±13% 218ms ±14% ~ (p=0.157 n=20+20)
LinkCompiler 503ms ±21% 484ms ±23% ~ (p=0.072 n=20+20)
ExternalLinkCompiler 1.27s ± 7% 1.22s ± 8% -3.85% (p=0.005 n=20+19)
LinkWithoutDebugCompiler 294ms ±17% 273ms ±11% -7.16% (p=0.001 n=19+18)
(https://perf.golang.org/search?q=upload:20200428.8)
The binary size improvement is even slightly better when you include
the CLs leading up to this. Relative to the parent of "cmd/compile:
mark PanicBounds/Extend as calls":
name old exe-bytes new exe-bytes delta
BinGoSize 15.0MB ± 0% 14.1MB ± 0% -6.18%
name old pcln-bytes new pcln-bytes delta
BinGoSize 3.22MB ± 0% 2.48MB ± 0% -22.92%
(https://perf.golang.org/search?q=upload:20200428.9)
For #36365.
Change-Id: I69448e714f2a44430067ca97f6b78e08c0abed27
Reviewed-on: https://go-review.googlesource.com/c/go/+/230544
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
-rw-r--r-- | src/cmd/compile/internal/gc/gsubr.go | 44 | ||||
-rw-r--r-- | src/cmd/compile/internal/gc/plive.go | 119 | ||||
-rw-r--r-- | src/cmd/internal/obj/link.go | 2 | ||||
-rw-r--r-- | src/cmd/internal/objabi/funcdata.go | 11 | ||||
-rw-r--r-- | src/runtime/debugcall.go | 40 | ||||
-rw-r--r-- | src/runtime/preempt.go | 25 | ||||
-rw-r--r-- | src/runtime/symtab.go | 11 |
7 files changed, 178 insertions, 74 deletions
diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go index 5a7d4c9e4d..bb1393ae6a 100644 --- a/src/cmd/compile/internal/gc/gsubr.go +++ b/src/cmd/compile/internal/gc/gsubr.go @@ -70,9 +70,13 @@ func newProgs(fn *Node, worker int) *Progs { pp.pos = fn.Pos pp.settext(fn) - pp.nextLive = LivenessInvalid // PCDATA tables implicitly start with index -1. pp.prevLive = LivenessIndex{-1, -1, false} + if go115ReduceLiveness { + pp.nextLive = pp.prevLive + } else { + pp.nextLive = LivenessInvalid + } return pp } @@ -117,18 +121,32 @@ func (pp *Progs) Prog(as obj.As) *obj.Prog { Addrconst(&p.From, objabi.PCDATA_StackMapIndex) Addrconst(&p.To, int64(idx)) } - if pp.nextLive.isUnsafePoint { - // Unsafe points are encoded as a special value in the - // register map. - pp.nextLive.regMapIndex = objabi.PCDATA_RegMapUnsafe - } - if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex { - // Emit register map index change. - idx := pp.nextLive.regMapIndex - pp.prevLive.regMapIndex = idx - p := pp.Prog(obj.APCDATA) - Addrconst(&p.From, objabi.PCDATA_RegMapIndex) - Addrconst(&p.To, int64(idx)) + if !go115ReduceLiveness { + if pp.nextLive.isUnsafePoint { + // Unsafe points are encoded as a special value in the + // register map. + pp.nextLive.regMapIndex = objabi.PCDATA_RegMapUnsafe + } + if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex { + // Emit register map index change. + idx := pp.nextLive.regMapIndex + pp.prevLive.regMapIndex = idx + p := pp.Prog(obj.APCDATA) + Addrconst(&p.From, objabi.PCDATA_RegMapIndex) + Addrconst(&p.To, int64(idx)) + } + } else { + if pp.nextLive.isUnsafePoint != pp.prevLive.isUnsafePoint { + // Emit unsafe-point marker. + pp.prevLive.isUnsafePoint = pp.nextLive.isUnsafePoint + p := pp.Prog(obj.APCDATA) + Addrconst(&p.From, objabi.PCDATA_UnsafePoint) + if pp.nextLive.isUnsafePoint { + Addrconst(&p.To, objabi.PCDATA_UnsafePointUnsafe) + } else { + Addrconst(&p.To, objabi.PCDATA_UnsafePointSafe) + } + } } p := pp.next diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go index f8ccdd2369..a4c051bda6 100644 --- a/src/cmd/compile/internal/gc/plive.go +++ b/src/cmd/compile/internal/gc/plive.go @@ -24,6 +24,16 @@ import ( "strings" ) +// go115ReduceLiveness disables register maps and only produces stack +// maps at call sites. +// +// In Go 1.15, we changed debug call injection to use conservative +// scanning instead of precise pointer maps, so these are no longer +// necessary. +// +// Keep in sync with runtime/preempt.go:go115ReduceLiveness. +const go115ReduceLiveness = true + // OpVarDef is an annotation for the liveness analysis, marking a place // where a complete initialization (definition) of a variable begins. // Since the liveness analysis can see initialization of single-word @@ -165,18 +175,27 @@ func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) { } func (m LivenessMap) Get(v *ssa.Value) LivenessIndex { - // All safe-points are in the map, so if v isn't in - // the map, it's an unsafe-point. + if !go115ReduceLiveness { + // All safe-points are in the map, so if v isn't in + // the map, it's an unsafe-point. + if idx, ok := m.vals[v.ID]; ok { + return idx + } + return LivenessInvalid + } + + // If v isn't in the map, then it's a "don't care" and not an + // unsafe-point. if idx, ok := m.vals[v.ID]; ok { return idx } - return LivenessInvalid + return LivenessIndex{StackMapDontCare, StackMapDontCare, false} } // LivenessIndex stores the liveness map information for a Value. type LivenessIndex struct { stackMapIndex int - regMapIndex int + regMapIndex int // only for !go115ReduceLiveness // isUnsafePoint indicates that this is an unsafe-point. // @@ -188,7 +207,7 @@ type LivenessIndex struct { } // LivenessInvalid indicates an unsafe point with no stack map. -var LivenessInvalid = LivenessIndex{StackMapDontCare, StackMapDontCare, true} +var LivenessInvalid = LivenessIndex{StackMapDontCare, StackMapDontCare, true} // only for !go115ReduceLiveness // StackMapDontCare indicates that the stack map index at a Value // doesn't matter. @@ -392,6 +411,9 @@ func affectedNode(v *ssa.Value) (*Node, ssa.SymEffect) { // regEffects returns the registers affected by v. func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) { + if go115ReduceLiveness { + return 0, 0 + } if v.Op == ssa.OpPhi { // All phi node arguments must come from the same // register and the result must also go to that @@ -473,7 +495,7 @@ func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) { return uevar, kill } -type liveRegMask uint32 +type liveRegMask uint32 // only if !go115ReduceLiveness func (m liveRegMask) niceString(config *ssa.Config) string { if m == 0 { @@ -835,7 +857,7 @@ func (lv *Liveness) hasStackMap(v *ssa.Value) bool { // The runtime only has safe-points in function prologues, so // we only need stack maps at call sites. go:nosplit functions // are similar. - if compiling_runtime || lv.f.NoSplit { + if go115ReduceLiveness || compiling_runtime || lv.f.NoSplit { if !v.Op.IsCall() { return false } @@ -1172,13 +1194,15 @@ func (lv *Liveness) epilogue() { lv.f.Fatalf("%v %L recorded as live on entry", lv.fn.Func.Nname, n) } } - // Check that no registers are live at function entry. - // The context register, if any, comes from a - // LoweredGetClosurePtr operation first thing in the function, - // so it doesn't appear live at entry. - if regs := lv.regMaps[0]; regs != 0 { - lv.printDebug() - lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config)) + if !go115ReduceLiveness { + // Check that no registers are live at function entry. + // The context register, if any, comes from a + // LoweredGetClosurePtr operation first thing in the function, + // so it doesn't appear live at entry. + if regs := lv.regMaps[0]; regs != 0 { + lv.printDebug() + lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config)) + } } } @@ -1199,7 +1223,7 @@ func (lv *Liveness) epilogue() { // PCDATA tables cost about 100k. So for now we keep using a single index for // both bitmap lists. func (lv *Liveness) compact(b *ssa.Block) { - add := func(live varRegVec, isUnsafePoint bool) LivenessIndex { + add := func(live varRegVec, isUnsafePoint bool) LivenessIndex { // only if !go115ReduceLiveness // Deduplicate the stack map. stackIndex := lv.stackMapSet.add(live.vars) // Deduplicate the register map. @@ -1214,11 +1238,26 @@ func (lv *Liveness) compact(b *ssa.Block) { pos := 0 if b == lv.f.Entry { // Handle entry stack map. - add(lv.livevars[0], false) + if !go115ReduceLiveness { + add(lv.livevars[0], false) + } else { + lv.stackMapSet.add(lv.livevars[0].vars) + } pos++ } for _, v := range b.Values { - if lv.hasStackMap(v) { + if go115ReduceLiveness { + hasStackMap := lv.hasStackMap(v) + isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID)) + idx := LivenessIndex{StackMapDontCare, 0, isUnsafePoint} + if hasStackMap { + idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos].vars) + pos++ + } + if hasStackMap || isUnsafePoint { + lv.livenessMap.set(v, idx) + } + } else if lv.hasStackMap(v) { isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID)) lv.livenessMap.set(v, add(lv.livevars[pos], isUnsafePoint)) pos++ @@ -1407,7 +1446,7 @@ func (lv *Liveness) printDebug() { printed = true } } - if pcdata.RegMapValid() { + if pcdata.RegMapValid() { // only if !go115ReduceLiveness regLive := lv.regMaps[pcdata.regMapIndex] if regLive != 0 { if printed { @@ -1491,19 +1530,21 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) { loff = dbvec(&liveSymTmp, loff, locals) } - regs := bvalloc(lv.usedRegs()) - roff := duint32(®sSymTmp, 0, uint32(len(lv.regMaps))) // number of bitmaps - roff = duint32(®sSymTmp, roff, uint32(regs.n)) // number of bits in each bitmap - if regs.n > 32 { - // Our uint32 conversion below won't work. - Fatalf("GP registers overflow uint32") - } + if !go115ReduceLiveness { + regs := bvalloc(lv.usedRegs()) + roff := duint32(®sSymTmp, 0, uint32(len(lv.regMaps))) // number of bitmaps + roff = duint32(®sSymTmp, roff, uint32(regs.n)) // number of bits in each bitmap + if regs.n > 32 { + // Our uint32 conversion below won't work. + Fatalf("GP registers overflow uint32") + } - if regs.n > 0 { - for _, live := range lv.regMaps { - regs.Clear() - regs.b[0] = uint32(live) - roff = dbvec(®sSymTmp, roff, regs) + if regs.n > 0 { + for _, live := range lv.regMaps { + regs.Clear() + regs.b[0] = uint32(live) + roff = dbvec(®sSymTmp, roff, regs) + } } } @@ -1518,7 +1559,11 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) { lsym.P = tmpSym.P }) } - return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(®sSymTmp) + if !go115ReduceLiveness { + return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(®sSymTmp) + } + // TODO(go115ReduceLiveness): Remove regsSym result + return makeSym(&argsSymTmp), makeSym(&liveSymTmp), nil } // Entry pointer for liveness analysis. Solves for the liveness of @@ -1578,11 +1623,13 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap { p.To.Name = obj.NAME_EXTERN p.To.Sym = ls.Func.GCLocals - p = pp.Prog(obj.AFUNCDATA) - Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps) - p.To.Type = obj.TYPE_MEM - p.To.Name = obj.NAME_EXTERN - p.To.Sym = ls.Func.GCRegs + if !go115ReduceLiveness { + p = pp.Prog(obj.AFUNCDATA) + Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ls.Func.GCRegs + } return lv.livenessMap } diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index c7bab6a2ca..65e58887e6 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -412,7 +412,7 @@ type FuncInfo struct { GCArgs *LSym GCLocals *LSym - GCRegs *LSym + GCRegs *LSym // Only if !go115ReduceLiveness StackObjects *LSym OpenCodedDeferInfo *LSym diff --git a/src/cmd/internal/objabi/funcdata.go b/src/cmd/internal/objabi/funcdata.go index 1c07f011da..2a51816cbd 100644 --- a/src/cmd/internal/objabi/funcdata.go +++ b/src/cmd/internal/objabi/funcdata.go @@ -11,13 +11,14 @@ package objabi // ../../../runtime/symtab.go. const ( - PCDATA_RegMapIndex = 0 + PCDATA_RegMapIndex = 0 // if !go115ReduceLiveness + PCDATA_UnsafePoint = 0 // if go115ReduceLiveness PCDATA_StackMapIndex = 1 PCDATA_InlTreeIndex = 2 FUNCDATA_ArgsPointerMaps = 0 FUNCDATA_LocalsPointerMaps = 1 - FUNCDATA_RegPointerMaps = 2 + FUNCDATA_RegPointerMaps = 2 // if !go115ReduceLiveness FUNCDATA_StackObjects = 3 FUNCDATA_InlTree = 4 FUNCDATA_OpenCodedDeferInfo = 5 @@ -32,5 +33,11 @@ const ( // Special PCDATA values. const ( // PCDATA_RegMapIndex values. + // + // Only if !go115ReduceLiveness. PCDATA_RegMapUnsafe = -2 // Unsafe for async preemption + + // PCDATA_UnsafePoint values. + PCDATA_UnsafePointSafe = -1 // Safe for async preemption + PCDATA_UnsafePointUnsafe = -2 // Unsafe for async preemption ) diff --git a/src/runtime/debugcall.go b/src/runtime/debugcall.go index 33c70b8c7f..5cbe382ce7 100644 --- a/src/runtime/debugcall.go +++ b/src/runtime/debugcall.go @@ -76,20 +76,32 @@ func debugCallCheck(pc uintptr) string { return } - // Look up PC's register map. - pcdata := int32(-1) - if pc != f.entry { - pc-- - pcdata = pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil) - } - if pcdata == -1 { - pcdata = 0 // in prologue - } - stkmap := (*stackmap)(funcdata(f, _FUNCDATA_RegPointerMaps)) - if pcdata == -2 || stkmap == nil { - // Not at a safe point. - ret = debugCallUnsafePoint - return + if !go115ReduceLiveness { + // Look up PC's register map. + pcdata := int32(-1) + if pc != f.entry { + pc-- + pcdata = pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil) + } + if pcdata == -1 { + pcdata = 0 // in prologue + } + stkmap := (*stackmap)(funcdata(f, _FUNCDATA_RegPointerMaps)) + if pcdata == -2 || stkmap == nil { + // Not at a safe point. + ret = debugCallUnsafePoint + return + } + } else { + // Check that this isn't an unsafe-point. + if pc != f.entry { + pc-- + } + up := pcdatavalue(f, _PCDATA_UnsafePoint, pc, nil) + if up != _PCDATA_UnsafePointSafe { + // Not at a safe point. + ret = debugCallUnsafePoint + } } }) return ret diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go index 420a7f96e0..41a32fa650 100644 --- a/src/runtime/preempt.go +++ b/src/runtime/preempt.go @@ -58,6 +58,9 @@ import ( "unsafe" ) +// Keep in sync with cmd/compile/internal/gc/plive.go:go115ReduceLiveness. +const go115ReduceLiveness = true + type suspendGState struct { g *g @@ -393,12 +396,22 @@ func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) bool { // use the LR for unwinding, which will be bad. return false } - smi := pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil) - if smi == -2 { - // Unsafe-point marked by compiler. This includes - // atomic sequences (e.g., write barrier) and nosplit - // functions (except at calls). - return false + if !go115ReduceLiveness { + smi := pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil) + if smi == -2 { + // Unsafe-point marked by compiler. This includes + // atomic sequences (e.g., write barrier) and nosplit + // functions (except at calls). + return false + } + } else { + up := pcdatavalue(f, _PCDATA_UnsafePoint, pc, nil) + if up != _PCDATA_UnsafePointSafe { + // Unsafe-point marked by compiler. This includes + // atomic sequences (e.g., write barrier) and nosplit + // functions (except at calls). + return false + } } if fd := funcdata(f, _FUNCDATA_LocalsPointerMaps); fd == nil || fd == unsafe.Pointer(&no_pointers_stackmap) { // This is assembly code. Don't assume it's diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go index b2147c4cb4..04aa90e077 100644 --- a/src/runtime/symtab.go +++ b/src/runtime/symtab.go @@ -268,13 +268,14 @@ func (f *Func) funcInfo() funcInfo { // // See funcdata.h and ../cmd/internal/objabi/funcdata.go. const ( - _PCDATA_RegMapIndex = 0 + _PCDATA_RegMapIndex = 0 // if !go115ReduceLiveness + _PCDATA_UnsafePoint = 0 // if go115ReduceLiveness _PCDATA_StackMapIndex = 1 _PCDATA_InlTreeIndex = 2 _FUNCDATA_ArgsPointerMaps = 0 _FUNCDATA_LocalsPointerMaps = 1 - _FUNCDATA_RegPointerMaps = 2 + _FUNCDATA_RegPointerMaps = 2 // if !go115ReduceLiveness _FUNCDATA_StackObjects = 3 _FUNCDATA_InlTree = 4 _FUNCDATA_OpenCodedDeferInfo = 5 @@ -282,6 +283,12 @@ const ( _ArgsSizeUnknown = -0x80000000 ) +const ( + // PCDATA_UnsafePoint values. + _PCDATA_UnsafePointSafe = -1 // Safe for async preemption + _PCDATA_UnsafePointUnsafe = -2 // Unsafe for async preemption +) + // A FuncID identifies particular functions that need to be treated // specially by the runtime. // Note that in some situations involving plugins, there may be multiple |