diff options
author | Heschi Kreinick <heschi@google.com> | 2017-07-21 18:30:19 -0400 |
---|---|---|
committer | Heschi Kreinick <heschi@google.com> | 2017-07-27 20:19:44 +0000 |
commit | 4c54a047c6ea88dd77416a3b878f6935165f6129 (patch) | |
tree | 8b680905cd4b42a16861b80b5f9f32c203315d51 /src/cmd/compile/internal/ssa/regalloc.go | |
parent | cd702b171c90be4b410d19bd93d5ea2899eaa809 (diff) | |
download | go-dev.debug.tar.gz go-dev.debug.zip |
[dev.debug] cmd/compile: better DWARF with optimizations ondev.debug
Debuggers use DWARF information to find local variables on the
stack and in registers. Prior to this CL, the DWARF information for
functions claimed that all variables were on the stack at all times.
That's incorrect when optimizations are enabled, and results in
debuggers showing data that is out of date or complete gibberish.
After this CL, the compiler is capable of representing variable
locations more accurately, and attempts to do so. Due to limitations of
the SSA backend, it's not possible to be completely correct.
There are a number of problems in the current design. One of the easier
to understand is that variable names currently must be attached to an
SSA value, but not all assignments in the source code actually result
in machine code. For example:
type myint int
var a int
b := myint(int)
and
b := (*uint64)(unsafe.Pointer(a))
don't generate machine code because the underlying representation is the
same, so the correct value of b will not be set when the user would
expect.
Generating the more precise debug information is behind a flag,
dwarflocationlists. Because of the issues described above, setting the
flag may not make the debugging experience much better, and may actually
make it worse in cases where the variable actually is on the stack and
the more complicated analysis doesn't realize it.
A number of changes are included:
- Add a new pseudo-instruction, RegKill, which indicates that the value
in the register has been clobbered.
- Adjust regalloc to emit RegKills in the right places. Significantly,
this means that phis are mixed with StoreReg and RegKills after
regalloc.
- Track variable decomposition in ssa.LocalSlots.
- After the SSA backend is done, analyze the result and build location
lists for each LocalSlot.
- After assembly is done, update the location lists with the assembled
PC offsets, recompose variables, and build DWARF location lists. Emit the
list as a new linker symbol, one per function.
- In the linker, aggregate the location lists into a .debug_loc section.
TODO:
- currently disabled for non-X86/AMD64 because there are no data tables.
go build -toolexec 'toolstash -cmp' -a std succeeds.
With -dwarflocationlists false:
before: f02812195637909ff675782c0b46836a8ff01976
after: 06f61e8112a42ac34fb80e0c818b3cdb84a5e7ec
benchstat -geomean /tmp/220352263 /tmp/621364410
completed 15 of 15, estimated time remaining 0s (eta 3:52PM)
name old time/op new time/op delta
Template 199ms ± 3% 198ms ± 2% ~ (p=0.400 n=15+14)
Unicode 96.6ms ± 5% 96.4ms ± 5% ~ (p=0.838 n=15+15)
GoTypes 653ms ± 2% 647ms ± 2% ~ (p=0.102 n=15+14)
Flate 133ms ± 6% 129ms ± 3% -2.62% (p=0.041 n=15+15)
GoParser 164ms ± 5% 159ms ± 3% -3.05% (p=0.000 n=15+15)
Reflect 428ms ± 4% 422ms ± 3% ~ (p=0.156 n=15+13)
Tar 123ms ±10% 124ms ± 8% ~ (p=0.461 n=15+15)
XML 228ms ± 3% 224ms ± 3% -1.57% (p=0.045 n=15+15)
[Geo mean] 206ms 377ms +82.86%
name old user-time/op new user-time/op delta
Template 292ms ±10% 301ms ±12% ~ (p=0.189 n=15+15)
Unicode 166ms ±37% 158ms ±14% ~ (p=0.418 n=15+14)
GoTypes 962ms ± 6% 963ms ± 7% ~ (p=0.976 n=15+15)
Flate 207ms ±19% 200ms ±14% ~ (p=0.345 n=14+15)
GoParser 246ms ±22% 240ms ±15% ~ (p=0.587 n=15+15)
Reflect 611ms ±13% 587ms ±14% ~ (p=0.085 n=15+13)
Tar 211ms ±12% 217ms ±14% ~ (p=0.355 n=14+15)
XML 335ms ±15% 320ms ±18% ~ (p=0.169 n=15+15)
[Geo mean] 317ms 583ms +83.72%
name old alloc/op new alloc/op delta
Template 40.2MB ± 0% 40.2MB ± 0% -0.15% (p=0.000 n=14+15)
Unicode 29.2MB ± 0% 29.3MB ± 0% ~ (p=0.624 n=15+15)
GoTypes 114MB ± 0% 114MB ± 0% -0.15% (p=0.000 n=15+14)
Flate 25.7MB ± 0% 25.6MB ± 0% -0.18% (p=0.000 n=13+15)
GoParser 32.2MB ± 0% 32.2MB ± 0% -0.14% (p=0.003 n=15+15)
Reflect 77.8MB ± 0% 77.9MB ± 0% ~ (p=0.061 n=15+15)
Tar 27.1MB ± 0% 27.0MB ± 0% -0.11% (p=0.029 n=15+15)
XML 42.7MB ± 0% 42.5MB ± 0% -0.29% (p=0.000 n=15+15)
[Geo mean] 42.1MB 75.0MB +78.05%
name old allocs/op new allocs/op delta
Template 402k ± 1% 398k ± 0% -0.91% (p=0.000 n=15+15)
Unicode 344k ± 1% 344k ± 0% ~ (p=0.715 n=15+14)
GoTypes 1.18M ± 0% 1.17M ± 0% -0.91% (p=0.000 n=15+14)
Flate 243k ± 0% 240k ± 1% -1.05% (p=0.000 n=13+15)
GoParser 327k ± 1% 324k ± 1% -0.96% (p=0.000 n=15+15)
Reflect 984k ± 1% 982k ± 0% ~ (p=0.050 n=15+15)
Tar 261k ± 1% 259k ± 1% -0.77% (p=0.000 n=15+15)
XML 411k ± 0% 404k ± 1% -1.55% (p=0.000 n=15+15)
[Geo mean] 439k 755k +72.01%
name old text-bytes new text-bytes delta
HelloSize 694kB ± 0% 694kB ± 0% -0.00% (p=0.000 n=15+15)
name old data-bytes new data-bytes delta
HelloSize 5.55kB ± 0% 5.55kB ± 0% ~ (all equal)
name old bss-bytes new bss-bytes delta
HelloSize 133kB ± 0% 133kB ± 0% ~ (all equal)
name old exe-bytes new exe-bytes delta
HelloSize 1.04MB ± 0% 1.04MB ± 0% ~ (all equal)
Change-Id: I991fc553ef175db46bb23b2128317bbd48de70d8
Reviewed-on: https://go-review.googlesource.com/41770
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/regalloc.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/regalloc.go | 94 |
1 files changed, 73 insertions, 21 deletions
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index e297e6bce7..0abaeaeeb5 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -242,6 +242,9 @@ type regAllocState struct { // current state of each (preregalloc) Value values []valState + // names associated with each Value + valueNames [][]LocalSlot + // ID of SP, SB values sp, sb ID @@ -300,6 +303,13 @@ type startReg struct { // freeReg frees up register r. Any current user of r is kicked out. func (s *regAllocState) freeReg(r register) { + s.freeOrResetReg(r, false) +} + +// freeOrResetReg frees up register r. Any current user of r is kicked out. +// resetting indicates that the operation is only for bookkeeping, +// e.g. when clearing out state upon entry to a new block. +func (s *regAllocState) freeOrResetReg(r register, resetting bool) { v := s.regs[r].v if v == nil { s.f.Fatalf("tried to free an already free register %d\n", r) @@ -309,6 +319,16 @@ func (s *regAllocState) freeReg(r register) { if s.f.pass.debug > regDebug { fmt.Printf("freeReg %s (dump %s/%s)\n", s.registers[r].Name(), v, s.regs[r].c) } + if !resetting && s.f.Config.ctxt.Flag_locationlists && len(s.valueNames[v.ID]) != 0 { + kill := s.curBlock.NewValue0(src.NoXPos, OpRegKill, types.TypeVoid) + for int(kill.ID) >= len(s.orig) { + s.orig = append(s.orig, nil) + } + for _, name := range s.valueNames[v.ID] { + s.f.NamedValues[name] = append(s.f.NamedValues[name], kill) + } + s.f.setHome(kill, &s.registers[r]) + } s.regs[r] = regState{} s.values[v.ID].regs &^= regMask(1) << r s.used &^= regMask(1) << r @@ -599,6 +619,17 @@ func (s *regAllocState) init(f *Func) { s.values = make([]valState, f.NumValues()) s.orig = make([]*Value, f.NumValues()) s.copies = make(map[*Value]bool) + if s.f.Config.ctxt.Flag_locationlists { + s.valueNames = make([][]LocalSlot, f.NumValues()) + for slot, values := range f.NamedValues { + if isSynthetic(&slot) { + continue + } + for _, value := range values { + s.valueNames[value.ID] = append(s.valueNames[value.ID], slot) + } + } + } for _, b := range f.Blocks { for _, v := range b.Values { if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() { @@ -692,7 +723,9 @@ func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool { // Sets the state of the registers to that encoded in regs. func (s *regAllocState) setState(regs []endReg) { - s.freeRegs(s.used) + for s.used != 0 { + s.freeOrResetReg(pickReg(s.used), true) + } for _, x := range regs { s.assignReg(x.r, x.v, x.c) } @@ -735,6 +768,9 @@ func (s *regAllocState) regalloc(f *Func) { } for _, b := range f.Blocks { + if s.f.pass.debug > regDebug { + fmt.Printf("Begin processing block %v\n", b) + } s.curBlock = b // Initialize regValLiveSet and uses fields for this block. @@ -830,9 +866,6 @@ func (s *regAllocState) regalloc(f *Func) { // This is the complicated case. We have more than one predecessor, // which means we may have Phi ops. - // Copy phi ops into new schedule. - b.Values = append(b.Values, phis...) - // Start with the final register state of the primary predecessor idx := s.primary[b.ID] if idx < 0 { @@ -910,6 +943,9 @@ func (s *regAllocState) regalloc(f *Func) { } } + // Copy phi ops into new schedule. + b.Values = append(b.Values, phis...) + // Third pass - pick registers for phis whose inputs // were not in a register. for i, v := range phis { @@ -1005,7 +1041,7 @@ func (s *regAllocState) regalloc(f *Func) { pidx := e.i for _, v := range succ.Values { if v.Op != OpPhi { - break + continue } if !s.values[v.ID].needReg { continue @@ -1565,6 +1601,9 @@ func (s *regAllocState) placeSpills() { for _, b := range f.Blocks { var m regMask for _, v := range b.Values { + if v.Op == OpRegKill { + continue + } if v.Op != OpPhi { break } @@ -1675,7 +1714,7 @@ func (s *regAllocState) placeSpills() { for _, b := range f.Blocks { nphi := 0 for _, v := range b.Values { - if v.Op != OpPhi { + if v.Op != OpRegKill && v.Op != OpPhi { break } nphi++ @@ -1800,6 +1839,9 @@ func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive } // Phis need their args to end up in a specific location. for _, v := range e.b.Values { + if v.Op == OpRegKill { + continue + } if v.Op != OpPhi { break } @@ -1878,6 +1920,7 @@ func (e *edgeState) process() { if e.s.f.pass.debug > regDebug { fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc.Name(), c) } + e.erase(r) if _, isReg := loc.(*Register); isReg { c = e.p.NewValue1(d.pos, OpCopy, c.Type, c) } else { @@ -1943,6 +1986,18 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP } } _, dstReg := loc.(*Register) + + // Pre-clobber destination. This avoids the + // following situation: + // - v is currently held in R0 and stacktmp0. + // - We want to copy stacktmp1 to stacktmp0. + // - We choose R0 as the temporary register. + // During the copy, both R0 and stacktmp0 are + // clobbered, losing both copies of v. Oops! + // Erasing the destination early means R0 will not + // be chosen as the temp register, as it will then + // be the last copy of v. + e.erase(loc) var x *Value if c == nil { if !e.s.values[vid].rematerializeable { @@ -1953,8 +2008,8 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP } else { // Rematerialize into stack slot. Need a free // register to accomplish this. - e.erase(loc) // see pre-clobber comment below r := e.findRegFor(v.Type) + e.erase(r) x = v.copyIntoNoXPos(e.p) e.set(r, vid, x, false, pos) // Make sure we spill with the size of the slot, not the @@ -1976,20 +2031,8 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP x = e.p.NewValue1(pos, OpLoadReg, c.Type, c) } else { // mem->mem. Use temp register. - - // Pre-clobber destination. This avoids the - // following situation: - // - v is currently held in R0 and stacktmp0. - // - We want to copy stacktmp1 to stacktmp0. - // - We choose R0 as the temporary register. - // During the copy, both R0 and stacktmp0 are - // clobbered, losing both copies of v. Oops! - // Erasing the destination early means R0 will not - // be chosen as the temp register, as it will then - // be the last copy of v. - e.erase(loc) - r := e.findRegFor(c.Type) + e.erase(r) t := e.p.NewValue1(pos, OpLoadReg, c.Type, c) e.set(r, vid, t, false, pos) x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t) @@ -2008,7 +2051,6 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP // set changes the contents of location loc to hold the given value and its cached representative. func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) { e.s.f.setHome(c, loc) - e.erase(loc) e.contents[loc] = contentRecord{vid, c, final, pos} a := e.cache[vid] if len(a) == 0 { @@ -2059,6 +2101,16 @@ func (e *edgeState) erase(loc Location) { fmt.Printf("v%d no longer available in %s:%s\n", vid, loc.Name(), c) } a[i], a = a[len(a)-1], a[:len(a)-1] + if e.s.f.Config.ctxt.Flag_locationlists { + if _, isReg := loc.(*Register); isReg && int(c.ID) < len(e.s.valueNames) && len(e.s.valueNames[c.ID]) != 0 { + kill := e.p.NewValue0(src.NoXPos, OpRegKill, types.TypeVoid) + e.s.f.setHome(kill, loc) + for _, name := range e.s.valueNames[c.ID] { + e.s.f.NamedValues[name] = append(e.s.f.NamedValues[name], kill) + } + } + } + break } } |