aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/looprotate.go
diff options
context:
space:
mode:
authorJosh Bleecher Snyder <josharian@gmail.com>2017-05-14 14:52:09 -0700
committerJosh Bleecher Snyder <josharian@gmail.com>2017-05-18 17:58:39 +0000
commit638ebb04f24b49a75211775d839f6cb557993a26 (patch)
treed18305b8a281dececffbe455de52f2b6b2faedf6 /src/cmd/compile/internal/ssa/looprotate.go
parent9a432552cb6dad659dd6ff7e3c9ab9defc5c73d2 (diff)
downloadgo-638ebb04f24b49a75211775d839f6cb557993a26.tar.gz
go-638ebb04f24b49a75211775d839f6cb557993a26.zip
cmd/compile: don't break up contiguous blocks in looprotate
looprotate finds loop headers and arranges for them to be placed after the body of the loop. This eliminates a jump from the body. However, if the loop header is a series of contiguously laid out blocks, the rotation introduces a new jump in that series. This CL expands the "loop header" to move to be the entire run of contiguously laid out blocks in the same loop. This shrinks object files a little, and actually speeds up the compiler noticeably. Numbers below. Fannkuch performance seems to vary a lot by machine. On my laptop: name old time/op new time/op delta Fannkuch11-8 2.89s ± 2% 2.85s ± 3% -1.22% (p=0.000 n=50+50) This has a significant affect on the append benchmarks in #14758: name old time/op new time/op delta Foo-8 312ns ± 3% 276ns ± 2% -11.37% (p=0.000 n=30+29) Bar-8 565ns ± 2% 456ns ± 2% -19.27% (p=0.000 n=27+28) Updates #18977 Fixes #20355 name old time/op new time/op delta Template 205ms ± 5% 204ms ± 8% ~ (p=0.903 n=92+99) Unicode 85.3ms ± 4% 85.1ms ± 3% ~ (p=0.191 n=92+94) GoTypes 512ms ± 4% 507ms ± 4% -0.93% (p=0.000 n=95+97) Compiler 2.38s ± 3% 2.35s ± 3% -1.27% (p=0.000 n=98+95) SSA 4.67s ± 3% 4.64s ± 3% -0.62% (p=0.000 n=95+96) Flate 117ms ± 3% 117ms ± 3% ~ (p=0.099 n=84+86) GoParser 139ms ± 4% 137ms ± 4% -0.90% (p=0.000 n=97+98) Reflect 329ms ± 5% 326ms ± 6% -0.97% (p=0.002 n=99+98) Tar 102ms ± 6% 101ms ± 5% -0.97% (p=0.006 n=97+97) XML 198ms ±10% 196ms ±13% ~ (p=0.087 n=100+100) [Geo mean] 318ms 316ms -0.72% name old user-time/op new user-time/op delta Template 250ms ± 7% 250ms ± 7% ~ (p=0.850 n=94+92) Unicode 107ms ± 8% 106ms ± 5% -0.76% (p=0.005 n=98+91) GoTypes 665ms ± 5% 659ms ± 5% -0.85% (p=0.003 n=93+98) Compiler 3.15s ± 3% 3.10s ± 3% -1.60% (p=0.000 n=99+98) SSA 6.82s ± 3% 6.72s ± 4% -1.55% (p=0.000 n=94+98) Flate 138ms ± 8% 138ms ± 6% ~ (p=0.369 n=94+92) GoParser 170ms ± 5% 168ms ± 6% -1.13% (p=0.002 n=96+98) Reflect 412ms ± 8% 416ms ± 8% ~ (p=0.169 n=100+100) Tar 123ms ±18% 123ms ±14% ~ (p=0.896 n=100+100) XML 236ms ± 9% 234ms ±11% ~ (p=0.124 n=100+100) [Geo mean] 401ms 398ms -0.63% name old alloc/op new alloc/op delta Template 38.8MB ± 0% 38.8MB ± 0% ~ (p=0.222 n=5+5) Unicode 28.7MB ± 0% 28.7MB ± 0% ~ (p=0.421 n=5+5) GoTypes 109MB ± 0% 109MB ± 0% ~ (p=0.056 n=5+5) Compiler 457MB ± 0% 457MB ± 0% +0.07% (p=0.008 n=5+5) SSA 1.10GB ± 0% 1.10GB ± 0% +0.05% (p=0.008 n=5+5) Flate 24.5MB ± 0% 24.5MB ± 0% ~ (p=0.222 n=5+5) GoParser 30.9MB ± 0% 31.0MB ± 0% +0.21% (p=0.016 n=5+5) Reflect 73.4MB ± 0% 73.4MB ± 0% ~ (p=0.421 n=5+5) Tar 25.5MB ± 0% 25.5MB ± 0% ~ (p=0.548 n=5+5) XML 40.9MB ± 0% 40.9MB ± 0% ~ (p=0.151 n=5+5) [Geo mean] 71.6MB 71.6MB +0.07% name old allocs/op new allocs/op delta Template 394k ± 0% 394k ± 0% ~ (p=1.000 n=5+5) Unicode 344k ± 0% 343k ± 0% ~ (p=0.310 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% ~ (p=1.000 n=5+5) Compiler 4.42M ± 0% 4.42M ± 0% ~ (p=1.000 n=5+5) SSA 9.80M ± 0% 9.80M ± 0% ~ (p=0.095 n=5+5) Flate 237k ± 1% 238k ± 1% ~ (p=0.310 n=5+5) GoParser 320k ± 0% 322k ± 1% +0.50% (p=0.032 n=5+5) Reflect 958k ± 0% 957k ± 0% ~ (p=0.548 n=5+5) Tar 252k ± 1% 252k ± 0% ~ (p=1.000 n=5+5) XML 400k ± 0% 400k ± 0% ~ (p=0.841 n=5+5) [Geo mean] 741k 742k +0.06% name old object-bytes new object-bytes delta Template 386k ± 0% 386k ± 0% -0.05% (p=0.008 n=5+5) Unicode 202k ± 0% 202k ± 0% -0.01% (p=0.008 n=5+5) GoTypes 1.16M ± 0% 1.16M ± 0% -0.06% (p=0.008 n=5+5) Compiler 3.91M ± 0% 3.91M ± 0% -0.06% (p=0.008 n=5+5) SSA 7.91M ± 0% 7.92M ± 0% +0.01% (p=0.008 n=5+5) Flate 228k ± 0% 227k ± 0% -0.04% (p=0.008 n=5+5) GoParser 283k ± 0% 283k ± 0% -0.06% (p=0.008 n=5+5) Reflect 952k ± 0% 951k ± 0% -0.02% (p=0.008 n=5+5) Tar 187k ± 0% 187k ± 0% -0.04% (p=0.008 n=5+5) XML 406k ± 0% 406k ± 0% -0.05% (p=0.008 n=5+5) [Geo mean] 648k 648k -0.04% Change-Id: I8630c4291a0eb2f7e7927bc04d7cc0efef181094 Reviewed-on: https://go-review.googlesource.com/43491 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/looprotate.go')
-rw-r--r--src/cmd/compile/internal/ssa/looprotate.go32
1 files changed, 27 insertions, 5 deletions
diff --git a/src/cmd/compile/internal/ssa/looprotate.go b/src/cmd/compile/internal/ssa/looprotate.go
index fc186124df..c5b768264d 100644
--- a/src/cmd/compile/internal/ssa/looprotate.go
+++ b/src/cmd/compile/internal/ssa/looprotate.go
@@ -27,12 +27,17 @@ func loopRotate(f *Func) {
return
}
+ idToIdx := make([]int, f.NumBlocks())
+ for i, b := range f.Blocks {
+ idToIdx[b.ID] = i
+ }
+
// Set of blocks we're moving, by ID.
move := map[ID]struct{}{}
- // Map from block ID to the moving block that should
+ // Map from block ID to the moving blocks that should
// come right after it.
- after := map[ID]*Block{}
+ after := map[ID][]*Block{}
// Check each loop header and decide if we want to move it.
for _, loop := range loopnest.loops {
@@ -50,10 +55,27 @@ func loopRotate(f *Func) {
if p == nil || p == b {
continue
}
+ after[p.ID] = []*Block{b}
+ for {
+ nextIdx := idToIdx[b.ID] + 1
+ if nextIdx >= len(f.Blocks) { // reached end of function (maybe impossible?)
+ break
+ }
+ nextb := f.Blocks[nextIdx]
+ if nextb == p { // original loop precedessor is next
+ break
+ }
+ if loopnest.b2l[nextb.ID] != loop { // about to leave loop
+ break
+ }
+ after[p.ID] = append(after[p.ID], nextb)
+ b = nextb
+ }
// Place b after p.
- move[b.ID] = struct{}{}
- after[p.ID] = b
+ for _, b := range after[p.ID] {
+ move[b.ID] = struct{}{}
+ }
}
// Move blocks to their destinations in a single pass.
@@ -67,7 +89,7 @@ func loopRotate(f *Func) {
}
f.Blocks[j] = b
j++
- if a := after[b.ID]; a != nil {
+ for _, a := range after[b.ID] {
if j > i {
f.Fatalf("head before tail in loop %s", b)
}