aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/gc/pgen.go
diff options
context:
space:
mode:
authorHeschi Kreinick <heschi@google.com>2017-07-21 18:30:19 -0400
committerHeschi Kreinick <heschi@google.com>2017-07-27 20:19:44 +0000
commit4c54a047c6ea88dd77416a3b878f6935165f6129 (patch)
tree8b680905cd4b42a16861b80b5f9f32c203315d51 /src/cmd/compile/internal/gc/pgen.go
parentcd702b171c90be4b410d19bd93d5ea2899eaa809 (diff)
downloadgo-dev.debug.tar.gz
go-dev.debug.zip
[dev.debug] cmd/compile: better DWARF with optimizations ondev.debug
Debuggers use DWARF information to find local variables on the stack and in registers. Prior to this CL, the DWARF information for functions claimed that all variables were on the stack at all times. That's incorrect when optimizations are enabled, and results in debuggers showing data that is out of date or complete gibberish. After this CL, the compiler is capable of representing variable locations more accurately, and attempts to do so. Due to limitations of the SSA backend, it's not possible to be completely correct. There are a number of problems in the current design. One of the easier to understand is that variable names currently must be attached to an SSA value, but not all assignments in the source code actually result in machine code. For example: type myint int var a int b := myint(int) and b := (*uint64)(unsafe.Pointer(a)) don't generate machine code because the underlying representation is the same, so the correct value of b will not be set when the user would expect. Generating the more precise debug information is behind a flag, dwarflocationlists. Because of the issues described above, setting the flag may not make the debugging experience much better, and may actually make it worse in cases where the variable actually is on the stack and the more complicated analysis doesn't realize it. A number of changes are included: - Add a new pseudo-instruction, RegKill, which indicates that the value in the register has been clobbered. - Adjust regalloc to emit RegKills in the right places. Significantly, this means that phis are mixed with StoreReg and RegKills after regalloc. - Track variable decomposition in ssa.LocalSlots. - After the SSA backend is done, analyze the result and build location lists for each LocalSlot. - After assembly is done, update the location lists with the assembled PC offsets, recompose variables, and build DWARF location lists. Emit the list as a new linker symbol, one per function. - In the linker, aggregate the location lists into a .debug_loc section. TODO: - currently disabled for non-X86/AMD64 because there are no data tables. go build -toolexec 'toolstash -cmp' -a std succeeds. With -dwarflocationlists false: before: f02812195637909ff675782c0b46836a8ff01976 after: 06f61e8112a42ac34fb80e0c818b3cdb84a5e7ec benchstat -geomean /tmp/220352263 /tmp/621364410 completed 15 of 15, estimated time remaining 0s (eta 3:52PM) name old time/op new time/op delta Template 199ms ± 3% 198ms ± 2% ~ (p=0.400 n=15+14) Unicode 96.6ms ± 5% 96.4ms ± 5% ~ (p=0.838 n=15+15) GoTypes 653ms ± 2% 647ms ± 2% ~ (p=0.102 n=15+14) Flate 133ms ± 6% 129ms ± 3% -2.62% (p=0.041 n=15+15) GoParser 164ms ± 5% 159ms ± 3% -3.05% (p=0.000 n=15+15) Reflect 428ms ± 4% 422ms ± 3% ~ (p=0.156 n=15+13) Tar 123ms ±10% 124ms ± 8% ~ (p=0.461 n=15+15) XML 228ms ± 3% 224ms ± 3% -1.57% (p=0.045 n=15+15) [Geo mean] 206ms 377ms +82.86% name old user-time/op new user-time/op delta Template 292ms ±10% 301ms ±12% ~ (p=0.189 n=15+15) Unicode 166ms ±37% 158ms ±14% ~ (p=0.418 n=15+14) GoTypes 962ms ± 6% 963ms ± 7% ~ (p=0.976 n=15+15) Flate 207ms ±19% 200ms ±14% ~ (p=0.345 n=14+15) GoParser 246ms ±22% 240ms ±15% ~ (p=0.587 n=15+15) Reflect 611ms ±13% 587ms ±14% ~ (p=0.085 n=15+13) Tar 211ms ±12% 217ms ±14% ~ (p=0.355 n=14+15) XML 335ms ±15% 320ms ±18% ~ (p=0.169 n=15+15) [Geo mean] 317ms 583ms +83.72% name old alloc/op new alloc/op delta Template 40.2MB ± 0% 40.2MB ± 0% -0.15% (p=0.000 n=14+15) Unicode 29.2MB ± 0% 29.3MB ± 0% ~ (p=0.624 n=15+15) GoTypes 114MB ± 0% 114MB ± 0% -0.15% (p=0.000 n=15+14) Flate 25.7MB ± 0% 25.6MB ± 0% -0.18% (p=0.000 n=13+15) GoParser 32.2MB ± 0% 32.2MB ± 0% -0.14% (p=0.003 n=15+15) Reflect 77.8MB ± 0% 77.9MB ± 0% ~ (p=0.061 n=15+15) Tar 27.1MB ± 0% 27.0MB ± 0% -0.11% (p=0.029 n=15+15) XML 42.7MB ± 0% 42.5MB ± 0% -0.29% (p=0.000 n=15+15) [Geo mean] 42.1MB 75.0MB +78.05% name old allocs/op new allocs/op delta Template 402k ± 1% 398k ± 0% -0.91% (p=0.000 n=15+15) Unicode 344k ± 1% 344k ± 0% ~ (p=0.715 n=15+14) GoTypes 1.18M ± 0% 1.17M ± 0% -0.91% (p=0.000 n=15+14) Flate 243k ± 0% 240k ± 1% -1.05% (p=0.000 n=13+15) GoParser 327k ± 1% 324k ± 1% -0.96% (p=0.000 n=15+15) Reflect 984k ± 1% 982k ± 0% ~ (p=0.050 n=15+15) Tar 261k ± 1% 259k ± 1% -0.77% (p=0.000 n=15+15) XML 411k ± 0% 404k ± 1% -1.55% (p=0.000 n=15+15) [Geo mean] 439k 755k +72.01% name old text-bytes new text-bytes delta HelloSize 694kB ± 0% 694kB ± 0% -0.00% (p=0.000 n=15+15) name old data-bytes new data-bytes delta HelloSize 5.55kB ± 0% 5.55kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 133kB ± 0% 133kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.04MB ± 0% 1.04MB ± 0% ~ (all equal) Change-Id: I991fc553ef175db46bb23b2128317bbd48de70d8 Reviewed-on: https://go-review.googlesource.com/41770 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Diffstat (limited to 'src/cmd/compile/internal/gc/pgen.go')
-rw-r--r--src/cmd/compile/internal/gc/pgen.go353
1 files changed, 321 insertions, 32 deletions
diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go
index d301ae19c8..542fd43b63 100644
--- a/src/cmd/compile/internal/gc/pgen.go
+++ b/src/cmd/compile/internal/gc/pgen.go
@@ -13,6 +13,7 @@ import (
"cmd/internal/src"
"cmd/internal/sys"
"fmt"
+ "math"
"math/rand"
"sort"
"sync"
@@ -303,47 +304,31 @@ func compileFunctions() {
func debuginfo(fnsym *obj.LSym, curfn interface{}) []dwarf.Scope {
fn := curfn.(*Node)
+ debugInfo := fn.Func.DebugInfo
+ fn.Func.DebugInfo = nil
if expect := fn.Func.Nname.Sym.Linksym(); fnsym != expect {
Fatalf("unexpected fnsym: %v != %v", fnsym, expect)
}
- var dwarfVars []*dwarf.Var
- var varScopes []ScopeID
-
+ var automDecls []*Node
+ // Populate Automs for fn.
for _, n := range fn.Func.Dcl {
if n.Op != ONAME { // might be OTYPE or OLITERAL
continue
}
-
var name obj.AddrName
- var abbrev int
- offs := n.Xoffset
-
switch n.Class() {
case PAUTO:
if !n.Name.Used() {
Fatalf("debuginfo unused node (AllocFrame should truncate fn.Func.Dcl)")
}
name = obj.NAME_AUTO
-
- abbrev = dwarf.DW_ABRV_AUTO
- if Ctxt.FixedFrameSize() == 0 {
- offs -= int64(Widthptr)
- }
- if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
- offs -= int64(Widthptr)
- }
-
case PPARAM, PPARAMOUT:
name = obj.NAME_PARAM
-
- abbrev = dwarf.DW_ABRV_PARAM
- offs += Ctxt.FixedFrameSize()
-
default:
continue
}
-
+ automDecls = append(automDecls, n)
gotype := ngotype(n).Linksym()
fnsym.Func.Autom = append(fnsym.Func.Autom, &obj.Auto{
Asym: Ctxt.Lookup(n.Sym.Name),
@@ -351,32 +336,336 @@ func debuginfo(fnsym *obj.LSym, curfn interface{}) []dwarf.Scope {
Name: name,
Gotype: gotype,
})
+ }
+
+ var dwarfVars []*dwarf.Var
+ var decls []*Node
+ if Ctxt.Flag_locationlists && Ctxt.Flag_optimize {
+ decls, dwarfVars = createComplexVars(fn, debugInfo)
+ } else {
+ decls, dwarfVars = createSimpleVars(automDecls)
+ }
+
+ var varScopes []ScopeID
+ for _, decl := range decls {
+ var scope ScopeID
+ if !decl.Name.Captured() && !decl.Name.Byval() {
+ // n.Pos of captured variables is their first
+ // use in the closure but they should always
+ // be assigned to scope 0 instead.
+ // TODO(mdempsky): Verify this.
+ scope = findScope(fn.Func.Marks, decl.Pos)
+ }
+ varScopes = append(varScopes, scope)
+ }
+ return assembleScopes(fnsym, fn, dwarfVars, varScopes)
+}
+// createSimpleVars creates a DWARF entry for every variable declared in the
+// function, claiming that they are permanently on the stack.
+func createSimpleVars(automDecls []*Node) ([]*Node, []*dwarf.Var) {
+ var vars []*dwarf.Var
+ var decls []*Node
+ for _, n := range automDecls {
if n.IsAutoTmp() {
continue
}
+ var abbrev int
+ offs := n.Xoffset
+
+ switch n.Class() {
+ case PAUTO:
+ abbrev = dwarf.DW_ABRV_AUTO
+ if Ctxt.FixedFrameSize() == 0 {
+ offs -= int64(Widthptr)
+ }
+ if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
+ offs -= int64(Widthptr)
+ }
+
+ case PPARAM, PPARAMOUT:
+ abbrev = dwarf.DW_ABRV_PARAM
+ offs += Ctxt.FixedFrameSize()
+ default:
+ Fatalf("createSimpleVars unexpected type %v for node %v", n.Class(), n)
+ }
- typename := dwarf.InfoPrefix + gotype.Name[len("type."):]
- dwarfVars = append(dwarfVars, &dwarf.Var{
+ typename := dwarf.InfoPrefix + typesymname(n.Type)
+ decls = append(decls, n)
+ vars = append(vars, &dwarf.Var{
Name: n.Sym.Name,
Abbrev: abbrev,
StackOffset: int32(offs),
Type: Ctxt.Lookup(typename),
})
+ }
+ return decls, vars
+}
- var scope ScopeID
- if !n.Name.Captured() && !n.Name.Byval() {
- // n.Pos of captured variables is their first
- // use in the closure but they should always
- // be assigned to scope 0 instead.
- // TODO(mdempsky): Verify this.
- scope = findScope(fn.Func.Marks, n.Pos)
+type varPart struct {
+ varOffset int64
+ slot ssa.SlotID
+ locs ssa.VarLocList
+}
+
+func createComplexVars(fn *Node, debugInfo *ssa.FuncDebug) ([]*Node, []*dwarf.Var) {
+ for _, locList := range debugInfo.Variables {
+ for _, loc := range locList.Locations {
+ if loc.StartProg != nil {
+ loc.StartPC = loc.StartProg.Pc
+ }
+ if loc.EndProg != nil {
+ loc.EndPC = loc.EndProg.Pc
+ }
+ if Debug_locationlist == 0 {
+ loc.EndProg = nil
+ loc.StartProg = nil
+ }
}
+ }
- varScopes = append(varScopes, scope)
+ // Group SSA variables by the user variable they were decomposed from.
+ varParts := map[*Node][]varPart{}
+ for slotID, locList := range debugInfo.Variables {
+ if len(locList.Locations) == 0 {
+ continue
+ }
+ slot := debugInfo.Slots[slotID]
+ for slot.SplitOf != nil {
+ slot = slot.SplitOf
+ }
+ n := slot.N.(*Node)
+ varParts[n] = append(varParts[n], varPart{varOffset(slot), ssa.SlotID(slotID), locList})
}
- return assembleScopes(fnsym, fn, dwarfVars, varScopes)
+ // Produce a DWARF variable entry for each user variable.
+ // Don't iterate over the map -- that's nondeterministic, and
+ // createComplexVar has side effects. Instead, go by slot.
+ var decls []*Node
+ var vars []*dwarf.Var
+ for _, slot := range debugInfo.Slots {
+ for slot.SplitOf != nil {
+ slot = slot.SplitOf
+ }
+ n := slot.N.(*Node)
+ parts := varParts[n]
+ if parts == nil {
+ continue
+ }
+
+ // Get the order the parts need to be in to represent the memory
+ // of the decomposed user variable.
+ sort.Sort(partsByVarOffset(parts))
+
+ if dvar := createComplexVar(debugInfo, n, parts); dvar != nil {
+ decls = append(decls, n)
+ vars = append(vars, dvar)
+ }
+ }
+ return decls, vars
+}
+
+// varOffset returns the offset of slot within the user variable it was
+// decomposed from. This has nothing to do with its stack offset.
+func varOffset(slot *ssa.LocalSlot) int64 {
+ offset := slot.Off
+ for ; slot.SplitOf != nil; slot = slot.SplitOf {
+ offset += slot.SplitOffset
+ }
+ return offset
+}
+
+type partsByVarOffset []varPart
+
+func (a partsByVarOffset) Len() int { return len(a) }
+func (a partsByVarOffset) Less(i, j int) bool { return a[i].varOffset < a[j].varOffset }
+func (a partsByVarOffset) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+
+// createComplexVar builds a DWARF variable entry and location list representing n.
+func createComplexVar(debugInfo *ssa.FuncDebug, n *Node, parts []varPart) *dwarf.Var {
+ slots := debugInfo.Slots
+ var offs int64 // base stack offset for this kind of variable
+ var abbrev int
+ switch n.Class() {
+ case PAUTO:
+ abbrev = dwarf.DW_ABRV_AUTO_LOCLIST
+ if Ctxt.FixedFrameSize() == 0 {
+ offs -= int64(Widthptr)
+ }
+ if objabi.Framepointer_enabled(objabi.GOOS, objabi.GOARCH) {
+ offs -= int64(Widthptr)
+ }
+
+ case PPARAM, PPARAMOUT:
+ abbrev = dwarf.DW_ABRV_PARAM_LOCLIST
+ offs += Ctxt.FixedFrameSize()
+ default:
+ return nil
+ }
+
+ gotype := ngotype(n).Linksym()
+ typename := dwarf.InfoPrefix + gotype.Name[len("type."):]
+ // The stack offset is used as a sorting key, so for decomposed
+ // variables just give it the lowest one. It's not used otherwise.
+ stackOffset := debugInfo.Slots[parts[0].slot].N.(*Node).Xoffset + offs
+ dvar := &dwarf.Var{
+ Name: n.Sym.Name,
+ Abbrev: abbrev,
+ Type: Ctxt.Lookup(typename),
+ StackOffset: int32(stackOffset),
+ }
+
+ if Debug_locationlist != 0 {
+ Ctxt.Logf("Building location list for %+v. Parts:\n", n)
+ for _, part := range parts {
+ Ctxt.Logf("\t%v => %v\n", debugInfo.Slots[part.slot], part.locs)
+ }
+ }
+
+ // Given a variable that's been decomposed into multiple parts,
+ // its location list may need a new entry after the beginning or
+ // end of every location entry for each of its parts. For example:
+ //
+ // [variable] [pc range]
+ // string.ptr |----|-----| |----|
+ // string.len |------------| |--|
+ // ... needs a location list like:
+ // string |----|-----|-| |--|-|
+ //
+ // Note that location entries may or may not line up with each other,
+ // and some of the result will only have one or the other part.
+ //
+ // To build the resulting list:
+ // - keep a "current" pointer for each part
+ // - find the next transition point
+ // - advance the current pointer for each part up to that transition point
+ // - build the piece for the range between that transition point and the next
+ // - repeat
+
+ curLoc := make([]int, len(slots))
+
+ // findBoundaryAfter finds the next beginning or end of a piece after currentPC.
+ findBoundaryAfter := func(currentPC int64) int64 {
+ min := int64(math.MaxInt64)
+ for slot, part := range parts {
+ // For each part, find the first PC greater than current. Doesn't
+ // matter if it's a start or an end, since we're looking for any boundary.
+ // If it's the new winner, save it.
+ onePart:
+ for i := curLoc[slot]; i < len(part.locs.Locations); i++ {
+ for _, pc := range [2]int64{part.locs.Locations[i].StartPC, part.locs.Locations[i].EndPC} {
+ if pc > currentPC {
+ if pc < min {
+ min = pc
+ }
+ break onePart
+ }
+ }
+ }
+ }
+ return min
+ }
+ var start int64
+ end := findBoundaryAfter(0)
+ for {
+ // Advance to the next chunk.
+ start = end
+ end = findBoundaryAfter(start)
+ if end == math.MaxInt64 {
+ break
+ }
+
+ dloc := dwarf.Location{StartPC: start, EndPC: end}
+ if Debug_locationlist != 0 {
+ Ctxt.Logf("Processing range %x -> %x\n", start, end)
+ }
+
+ // Advance curLoc to the last location that starts before/at start.
+ // After this loop, if there's a location that covers [start, end), it will be current.
+ // Otherwise the current piece will be too early.
+ for _, part := range parts {
+ choice := -1
+ for i := curLoc[part.slot]; i < len(part.locs.Locations); i++ {
+ if part.locs.Locations[i].StartPC > start {
+ break //overshot
+ }
+ choice = i // best yet
+ }
+ if choice != -1 {
+ curLoc[part.slot] = choice
+ }
+ if Debug_locationlist != 0 {
+ Ctxt.Logf("\t %v => %v", slots[part.slot], curLoc[part.slot])
+ }
+ }
+ if Debug_locationlist != 0 {
+ Ctxt.Logf("\n")
+ }
+ // Assemble the location list entry for this chunk.
+ present := 0
+ for _, part := range parts {
+ dpiece := dwarf.Piece{
+ Length: slots[part.slot].Type.Size(),
+ }
+ locIdx := curLoc[part.slot]
+ if locIdx >= len(part.locs.Locations) ||
+ start >= part.locs.Locations[locIdx].EndPC ||
+ end <= part.locs.Locations[locIdx].StartPC {
+ if Debug_locationlist != 0 {
+ Ctxt.Logf("\t%v: missing", slots[part.slot])
+ }
+ dpiece.Missing = true
+ dloc.Pieces = append(dloc.Pieces, dpiece)
+ continue
+ }
+ present++
+ loc := part.locs.Locations[locIdx]
+ if Debug_locationlist != 0 {
+ Ctxt.Logf("\t%v: %v", slots[part.slot], loc)
+ }
+ if loc.OnStack {
+ dpiece.OnStack = true
+ dpiece.StackOffset = int32(offs + slots[part.slot].Off + slots[part.slot].N.(*Node).Xoffset)
+ } else {
+ for reg := 0; reg < len(debugInfo.Registers); reg++ {
+ if loc.Registers&(1<<uint8(reg)) != 0 {
+ dpiece.RegNum = Ctxt.Arch.DWARFRegisters[debugInfo.Registers[reg].ObjNum()]
+ }
+ }
+ }
+ dloc.Pieces = append(dloc.Pieces, dpiece)
+ }
+ if present == 0 {
+ if Debug_locationlist != 0 {
+ Ctxt.Logf(" -> totally missing\n")
+ }
+ continue
+ }
+ // Extend the previous entry if possible.
+ if len(dvar.LocationList) > 0 {
+ prev := &dvar.LocationList[len(dvar.LocationList)-1]
+ if prev.EndPC == dloc.StartPC && len(prev.Pieces) == len(dloc.Pieces) {
+ equal := true
+ for i := range prev.Pieces {
+ if prev.Pieces[i] != dloc.Pieces[i] {
+ equal = false
+ }
+ }
+ if equal {
+ prev.EndPC = end
+ if Debug_locationlist != 0 {
+ Ctxt.Logf("-> merged with previous, now %#v\n", prev)
+ }
+ continue
+ }
+ }
+ }
+ dvar.LocationList = append(dvar.LocationList, dloc)
+ if Debug_locationlist != 0 {
+ Ctxt.Logf("-> added: %#v\n", dloc)
+ }
+ }
+ return dvar
}
// fieldtrack adds R_USEFIELD relocations to fnsym to record any