aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/regalloc.go
diff options
context:
space:
mode:
authorHeschi Kreinick <heschi@google.com>2017-07-21 18:30:19 -0400
committerHeschi Kreinick <heschi@google.com>2017-07-27 20:19:44 +0000
commit4c54a047c6ea88dd77416a3b878f6935165f6129 (patch)
tree8b680905cd4b42a16861b80b5f9f32c203315d51 /src/cmd/compile/internal/ssa/regalloc.go
parentcd702b171c90be4b410d19bd93d5ea2899eaa809 (diff)
downloadgo-dev.debug.tar.gz
go-dev.debug.zip
[dev.debug] cmd/compile: better DWARF with optimizations ondev.debug
Debuggers use DWARF information to find local variables on the stack and in registers. Prior to this CL, the DWARF information for functions claimed that all variables were on the stack at all times. That's incorrect when optimizations are enabled, and results in debuggers showing data that is out of date or complete gibberish. After this CL, the compiler is capable of representing variable locations more accurately, and attempts to do so. Due to limitations of the SSA backend, it's not possible to be completely correct. There are a number of problems in the current design. One of the easier to understand is that variable names currently must be attached to an SSA value, but not all assignments in the source code actually result in machine code. For example: type myint int var a int b := myint(int) and b := (*uint64)(unsafe.Pointer(a)) don't generate machine code because the underlying representation is the same, so the correct value of b will not be set when the user would expect. Generating the more precise debug information is behind a flag, dwarflocationlists. Because of the issues described above, setting the flag may not make the debugging experience much better, and may actually make it worse in cases where the variable actually is on the stack and the more complicated analysis doesn't realize it. A number of changes are included: - Add a new pseudo-instruction, RegKill, which indicates that the value in the register has been clobbered. - Adjust regalloc to emit RegKills in the right places. Significantly, this means that phis are mixed with StoreReg and RegKills after regalloc. - Track variable decomposition in ssa.LocalSlots. - After the SSA backend is done, analyze the result and build location lists for each LocalSlot. - After assembly is done, update the location lists with the assembled PC offsets, recompose variables, and build DWARF location lists. Emit the list as a new linker symbol, one per function. - In the linker, aggregate the location lists into a .debug_loc section. TODO: - currently disabled for non-X86/AMD64 because there are no data tables. go build -toolexec 'toolstash -cmp' -a std succeeds. With -dwarflocationlists false: before: f02812195637909ff675782c0b46836a8ff01976 after: 06f61e8112a42ac34fb80e0c818b3cdb84a5e7ec benchstat -geomean /tmp/220352263 /tmp/621364410 completed 15 of 15, estimated time remaining 0s (eta 3:52PM) name old time/op new time/op delta Template 199ms ± 3% 198ms ± 2% ~ (p=0.400 n=15+14) Unicode 96.6ms ± 5% 96.4ms ± 5% ~ (p=0.838 n=15+15) GoTypes 653ms ± 2% 647ms ± 2% ~ (p=0.102 n=15+14) Flate 133ms ± 6% 129ms ± 3% -2.62% (p=0.041 n=15+15) GoParser 164ms ± 5% 159ms ± 3% -3.05% (p=0.000 n=15+15) Reflect 428ms ± 4% 422ms ± 3% ~ (p=0.156 n=15+13) Tar 123ms ±10% 124ms ± 8% ~ (p=0.461 n=15+15) XML 228ms ± 3% 224ms ± 3% -1.57% (p=0.045 n=15+15) [Geo mean] 206ms 377ms +82.86% name old user-time/op new user-time/op delta Template 292ms ±10% 301ms ±12% ~ (p=0.189 n=15+15) Unicode 166ms ±37% 158ms ±14% ~ (p=0.418 n=15+14) GoTypes 962ms ± 6% 963ms ± 7% ~ (p=0.976 n=15+15) Flate 207ms ±19% 200ms ±14% ~ (p=0.345 n=14+15) GoParser 246ms ±22% 240ms ±15% ~ (p=0.587 n=15+15) Reflect 611ms ±13% 587ms ±14% ~ (p=0.085 n=15+13) Tar 211ms ±12% 217ms ±14% ~ (p=0.355 n=14+15) XML 335ms ±15% 320ms ±18% ~ (p=0.169 n=15+15) [Geo mean] 317ms 583ms +83.72% name old alloc/op new alloc/op delta Template 40.2MB ± 0% 40.2MB ± 0% -0.15% (p=0.000 n=14+15) Unicode 29.2MB ± 0% 29.3MB ± 0% ~ (p=0.624 n=15+15) GoTypes 114MB ± 0% 114MB ± 0% -0.15% (p=0.000 n=15+14) Flate 25.7MB ± 0% 25.6MB ± 0% -0.18% (p=0.000 n=13+15) GoParser 32.2MB ± 0% 32.2MB ± 0% -0.14% (p=0.003 n=15+15) Reflect 77.8MB ± 0% 77.9MB ± 0% ~ (p=0.061 n=15+15) Tar 27.1MB ± 0% 27.0MB ± 0% -0.11% (p=0.029 n=15+15) XML 42.7MB ± 0% 42.5MB ± 0% -0.29% (p=0.000 n=15+15) [Geo mean] 42.1MB 75.0MB +78.05% name old allocs/op new allocs/op delta Template 402k ± 1% 398k ± 0% -0.91% (p=0.000 n=15+15) Unicode 344k ± 1% 344k ± 0% ~ (p=0.715 n=15+14) GoTypes 1.18M ± 0% 1.17M ± 0% -0.91% (p=0.000 n=15+14) Flate 243k ± 0% 240k ± 1% -1.05% (p=0.000 n=13+15) GoParser 327k ± 1% 324k ± 1% -0.96% (p=0.000 n=15+15) Reflect 984k ± 1% 982k ± 0% ~ (p=0.050 n=15+15) Tar 261k ± 1% 259k ± 1% -0.77% (p=0.000 n=15+15) XML 411k ± 0% 404k ± 1% -1.55% (p=0.000 n=15+15) [Geo mean] 439k 755k +72.01% name old text-bytes new text-bytes delta HelloSize 694kB ± 0% 694kB ± 0% -0.00% (p=0.000 n=15+15) name old data-bytes new data-bytes delta HelloSize 5.55kB ± 0% 5.55kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 133kB ± 0% 133kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.04MB ± 0% 1.04MB ± 0% ~ (all equal) Change-Id: I991fc553ef175db46bb23b2128317bbd48de70d8 Reviewed-on: https://go-review.googlesource.com/41770 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/regalloc.go')
-rw-r--r--src/cmd/compile/internal/ssa/regalloc.go94
1 files changed, 73 insertions, 21 deletions
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index e297e6bce7..0abaeaeeb5 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -242,6 +242,9 @@ type regAllocState struct {
// current state of each (preregalloc) Value
values []valState
+ // names associated with each Value
+ valueNames [][]LocalSlot
+
// ID of SP, SB values
sp, sb ID
@@ -300,6 +303,13 @@ type startReg struct {
// freeReg frees up register r. Any current user of r is kicked out.
func (s *regAllocState) freeReg(r register) {
+ s.freeOrResetReg(r, false)
+}
+
+// freeOrResetReg frees up register r. Any current user of r is kicked out.
+// resetting indicates that the operation is only for bookkeeping,
+// e.g. when clearing out state upon entry to a new block.
+func (s *regAllocState) freeOrResetReg(r register, resetting bool) {
v := s.regs[r].v
if v == nil {
s.f.Fatalf("tried to free an already free register %d\n", r)
@@ -309,6 +319,16 @@ func (s *regAllocState) freeReg(r register) {
if s.f.pass.debug > regDebug {
fmt.Printf("freeReg %s (dump %s/%s)\n", s.registers[r].Name(), v, s.regs[r].c)
}
+ if !resetting && s.f.Config.ctxt.Flag_locationlists && len(s.valueNames[v.ID]) != 0 {
+ kill := s.curBlock.NewValue0(src.NoXPos, OpRegKill, types.TypeVoid)
+ for int(kill.ID) >= len(s.orig) {
+ s.orig = append(s.orig, nil)
+ }
+ for _, name := range s.valueNames[v.ID] {
+ s.f.NamedValues[name] = append(s.f.NamedValues[name], kill)
+ }
+ s.f.setHome(kill, &s.registers[r])
+ }
s.regs[r] = regState{}
s.values[v.ID].regs &^= regMask(1) << r
s.used &^= regMask(1) << r
@@ -599,6 +619,17 @@ func (s *regAllocState) init(f *Func) {
s.values = make([]valState, f.NumValues())
s.orig = make([]*Value, f.NumValues())
s.copies = make(map[*Value]bool)
+ if s.f.Config.ctxt.Flag_locationlists {
+ s.valueNames = make([][]LocalSlot, f.NumValues())
+ for slot, values := range f.NamedValues {
+ if isSynthetic(&slot) {
+ continue
+ }
+ for _, value := range values {
+ s.valueNames[value.ID] = append(s.valueNames[value.ID], slot)
+ }
+ }
+ }
for _, b := range f.Blocks {
for _, v := range b.Values {
if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
@@ -692,7 +723,9 @@ func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
// Sets the state of the registers to that encoded in regs.
func (s *regAllocState) setState(regs []endReg) {
- s.freeRegs(s.used)
+ for s.used != 0 {
+ s.freeOrResetReg(pickReg(s.used), true)
+ }
for _, x := range regs {
s.assignReg(x.r, x.v, x.c)
}
@@ -735,6 +768,9 @@ func (s *regAllocState) regalloc(f *Func) {
}
for _, b := range f.Blocks {
+ if s.f.pass.debug > regDebug {
+ fmt.Printf("Begin processing block %v\n", b)
+ }
s.curBlock = b
// Initialize regValLiveSet and uses fields for this block.
@@ -830,9 +866,6 @@ func (s *regAllocState) regalloc(f *Func) {
// This is the complicated case. We have more than one predecessor,
// which means we may have Phi ops.
- // Copy phi ops into new schedule.
- b.Values = append(b.Values, phis...)
-
// Start with the final register state of the primary predecessor
idx := s.primary[b.ID]
if idx < 0 {
@@ -910,6 +943,9 @@ func (s *regAllocState) regalloc(f *Func) {
}
}
+ // Copy phi ops into new schedule.
+ b.Values = append(b.Values, phis...)
+
// Third pass - pick registers for phis whose inputs
// were not in a register.
for i, v := range phis {
@@ -1005,7 +1041,7 @@ func (s *regAllocState) regalloc(f *Func) {
pidx := e.i
for _, v := range succ.Values {
if v.Op != OpPhi {
- break
+ continue
}
if !s.values[v.ID].needReg {
continue
@@ -1565,6 +1601,9 @@ func (s *regAllocState) placeSpills() {
for _, b := range f.Blocks {
var m regMask
for _, v := range b.Values {
+ if v.Op == OpRegKill {
+ continue
+ }
if v.Op != OpPhi {
break
}
@@ -1675,7 +1714,7 @@ func (s *regAllocState) placeSpills() {
for _, b := range f.Blocks {
nphi := 0
for _, v := range b.Values {
- if v.Op != OpPhi {
+ if v.Op != OpRegKill && v.Op != OpPhi {
break
}
nphi++
@@ -1800,6 +1839,9 @@ func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive
}
// Phis need their args to end up in a specific location.
for _, v := range e.b.Values {
+ if v.Op == OpRegKill {
+ continue
+ }
if v.Op != OpPhi {
break
}
@@ -1878,6 +1920,7 @@ func (e *edgeState) process() {
if e.s.f.pass.debug > regDebug {
fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc.Name(), c)
}
+ e.erase(r)
if _, isReg := loc.(*Register); isReg {
c = e.p.NewValue1(d.pos, OpCopy, c.Type, c)
} else {
@@ -1943,6 +1986,18 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
}
}
_, dstReg := loc.(*Register)
+
+ // Pre-clobber destination. This avoids the
+ // following situation:
+ // - v is currently held in R0 and stacktmp0.
+ // - We want to copy stacktmp1 to stacktmp0.
+ // - We choose R0 as the temporary register.
+ // During the copy, both R0 and stacktmp0 are
+ // clobbered, losing both copies of v. Oops!
+ // Erasing the destination early means R0 will not
+ // be chosen as the temp register, as it will then
+ // be the last copy of v.
+ e.erase(loc)
var x *Value
if c == nil {
if !e.s.values[vid].rematerializeable {
@@ -1953,8 +2008,8 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
} else {
// Rematerialize into stack slot. Need a free
// register to accomplish this.
- e.erase(loc) // see pre-clobber comment below
r := e.findRegFor(v.Type)
+ e.erase(r)
x = v.copyIntoNoXPos(e.p)
e.set(r, vid, x, false, pos)
// Make sure we spill with the size of the slot, not the
@@ -1976,20 +2031,8 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
} else {
// mem->mem. Use temp register.
-
- // Pre-clobber destination. This avoids the
- // following situation:
- // - v is currently held in R0 and stacktmp0.
- // - We want to copy stacktmp1 to stacktmp0.
- // - We choose R0 as the temporary register.
- // During the copy, both R0 and stacktmp0 are
- // clobbered, losing both copies of v. Oops!
- // Erasing the destination early means R0 will not
- // be chosen as the temp register, as it will then
- // be the last copy of v.
- e.erase(loc)
-
r := e.findRegFor(c.Type)
+ e.erase(r)
t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
e.set(r, vid, t, false, pos)
x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
@@ -2008,7 +2051,6 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
// set changes the contents of location loc to hold the given value and its cached representative.
func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
e.s.f.setHome(c, loc)
- e.erase(loc)
e.contents[loc] = contentRecord{vid, c, final, pos}
a := e.cache[vid]
if len(a) == 0 {
@@ -2059,6 +2101,16 @@ func (e *edgeState) erase(loc Location) {
fmt.Printf("v%d no longer available in %s:%s\n", vid, loc.Name(), c)
}
a[i], a = a[len(a)-1], a[:len(a)-1]
+ if e.s.f.Config.ctxt.Flag_locationlists {
+ if _, isReg := loc.(*Register); isReg && int(c.ID) < len(e.s.valueNames) && len(e.s.valueNames[c.ID]) != 0 {
+ kill := e.p.NewValue0(src.NoXPos, OpRegKill, types.TypeVoid)
+ e.s.f.setHome(kill, loc)
+ for _, name := range e.s.valueNames[c.ID] {
+ e.s.f.NamedValues[name] = append(e.s.f.NamedValues[name], kill)
+ }
+ }
+ }
+
break
}
}