aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/rewriteAMD64splitload.go
diff options
context:
space:
mode:
authorJosh Bleecher Snyder <josharian@gmail.com>2020-01-20 20:09:41 -0800
committerJosh Bleecher Snyder <josharian@gmail.com>2020-02-21 00:55:58 +0000
commita3f234c7060b76a4397c2c38dc778a1f82d7fa17 (patch)
tree07615787799dd316debc3aaece530922bfa81e14 /src/cmd/compile/internal/ssa/rewriteAMD64splitload.go
parenta37bbcecca120ed513035a6d8532367daa4e6a95 (diff)
downloadgo-a3f234c7060b76a4397c2c38dc778a1f82d7fa17.tar.gz
go-a3f234c7060b76a4397c2c38dc778a1f82d7fa17.zip
cmd/compile: reduce bounds checks in generated rewrite rules
CL 213703 converted generated rewrite rules for commutative ops to use loops instead of duplicated code. However, it loaded args using expressions like v.Args[i] and v.Args[i^1], which the compiler could not eliminate bounds for (including with all outstanding prove CLs). Also, given a series of separate rewrite rules for the same op, we generated bounds checks for every rewrite rule, even though we were repeatedly loading the same set of args. This change reduces both sets of bounds checks. Instead of loading v.Args[i] and v.Args[i^1] for commutative loops, we now preload v.Args[0] and v.Args[1] into local variables, and then swap them (as needed) in the commutative loop post statement. And we now load all top level v.Args into local variables at the beginning of every rewrite rule function. The second optimization is the more significant, but the first helps a little, and they play together nicely from the perspective of generating the code. This does increase register pressure, but the reduced bounds checks more than compensate. Note that the vast majority of rewrite rules evaluated are not applied, so the prologue is the most important part of the rewrite rules. There is one subtle aspect to the new generated code. Because the top level v.Args are shared across rewrite rules, and rule evaluation can swap v_0 and v_1, v_0 and v_1 can end up being swapped from one rule to the next. That is OK, because any time a rule does not get applied, they will have been swapped exactly twice. Passes toolstash-check -all. name old time/op new time/op delta Template 213ms ± 2% 211ms ± 2% -0.85% (p=0.000 n=92+96) Unicode 83.5ms ± 2% 83.2ms ± 2% -0.41% (p=0.004 n=95+90) GoTypes 737ms ± 2% 733ms ± 2% -0.51% (p=0.000 n=91+94) Compiler 3.45s ± 2% 3.43s ± 2% -0.44% (p=0.000 n=99+100) SSA 8.54s ± 1% 8.32s ± 2% -2.56% (p=0.000 n=96+99) Flate 136ms ± 2% 135ms ± 1% -0.47% (p=0.000 n=96+96) GoParser 169ms ± 1% 168ms ± 1% -0.33% (p=0.000 n=96+93) Reflect 456ms ± 3% 455ms ± 3% ~ (p=0.261 n=95+94) Tar 186ms ± 2% 185ms ± 2% -0.48% (p=0.000 n=94+95) XML 251ms ± 1% 250ms ± 1% -0.51% (p=0.000 n=91+94) [Geo mean] 424ms 421ms -0.68% name old user-time/op new user-time/op delta Template 275ms ± 1% 274ms ± 2% -0.55% (p=0.000 n=95+98) Unicode 118ms ± 4% 118ms ± 4% ~ (p=0.642 n=98+90) GoTypes 983ms ± 1% 980ms ± 1% -0.30% (p=0.000 n=93+93) Compiler 4.56s ± 6% 4.52s ± 6% -0.72% (p=0.003 n=100+100) SSA 11.4s ± 1% 11.1s ± 1% -2.50% (p=0.000 n=96+97) Flate 168ms ± 1% 167ms ± 1% -0.49% (p=0.000 n=92+92) GoParser 204ms ± 1% 204ms ± 2% -0.27% (p=0.003 n=99+96) Reflect 599ms ± 2% 598ms ± 2% ~ (p=0.116 n=95+92) Tar 227ms ± 2% 225ms ± 2% -0.57% (p=0.000 n=95+98) XML 313ms ± 2% 312ms ± 1% -0.37% (p=0.000 n=89+95) [Geo mean] 547ms 544ms -0.61% file before after Δ % compile 21113112 21109016 -4096 -0.019% total 131704940 131700844 -4096 -0.003% Change-Id: Id6c39e0367e597c0c75b8a4b1eb14cc3cbd11956 Reviewed-on: https://go-review.googlesource.com/c/go/+/216218 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/rewriteAMD64splitload.go')
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64splitload.go60
1 files changed, 40 insertions, 20 deletions
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64splitload.go b/src/cmd/compile/internal/ssa/rewriteAMD64splitload.go
index a7aa51268d..ec7d2270b3 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64splitload.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64splitload.go
@@ -25,6 +25,8 @@ func rewriteValueAMD64splitload(v *Value) bool {
return false
}
func rewriteValueAMD64splitload_OpAMD64CMPBconstload_0(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (CMPBconstload {sym} [vo] ptr mem)
@@ -32,8 +34,8 @@ func rewriteValueAMD64splitload_OpAMD64CMPBconstload_0(v *Value) bool {
for {
vo := v.AuxInt
sym := v.Aux
- mem := v.Args[1]
- ptr := v.Args[0]
+ ptr := v_0
+ mem := v_1
v.reset(OpAMD64CMPBconst)
v.AuxInt = valOnly(vo)
v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
@@ -46,6 +48,9 @@ func rewriteValueAMD64splitload_OpAMD64CMPBconstload_0(v *Value) bool {
}
}
func rewriteValueAMD64splitload_OpAMD64CMPBload_0(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (CMPBload {sym} [off] ptr x mem)
@@ -53,9 +58,9 @@ func rewriteValueAMD64splitload_OpAMD64CMPBload_0(v *Value) bool {
for {
off := v.AuxInt
sym := v.Aux
- mem := v.Args[2]
- ptr := v.Args[0]
- x := v.Args[1]
+ ptr := v_0
+ x := v_1
+ mem := v_2
v.reset(OpAMD64CMPB)
v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
v0.AuxInt = off
@@ -68,6 +73,8 @@ func rewriteValueAMD64splitload_OpAMD64CMPBload_0(v *Value) bool {
}
}
func rewriteValueAMD64splitload_OpAMD64CMPLconstload_0(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (CMPLconstload {sym} [vo] ptr mem)
@@ -75,8 +82,8 @@ func rewriteValueAMD64splitload_OpAMD64CMPLconstload_0(v *Value) bool {
for {
vo := v.AuxInt
sym := v.Aux
- mem := v.Args[1]
- ptr := v.Args[0]
+ ptr := v_0
+ mem := v_1
v.reset(OpAMD64CMPLconst)
v.AuxInt = valOnly(vo)
v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
@@ -89,6 +96,9 @@ func rewriteValueAMD64splitload_OpAMD64CMPLconstload_0(v *Value) bool {
}
}
func rewriteValueAMD64splitload_OpAMD64CMPLload_0(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (CMPLload {sym} [off] ptr x mem)
@@ -96,9 +106,9 @@ func rewriteValueAMD64splitload_OpAMD64CMPLload_0(v *Value) bool {
for {
off := v.AuxInt
sym := v.Aux
- mem := v.Args[2]
- ptr := v.Args[0]
- x := v.Args[1]
+ ptr := v_0
+ x := v_1
+ mem := v_2
v.reset(OpAMD64CMPL)
v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
v0.AuxInt = off
@@ -111,6 +121,8 @@ func rewriteValueAMD64splitload_OpAMD64CMPLload_0(v *Value) bool {
}
}
func rewriteValueAMD64splitload_OpAMD64CMPQconstload_0(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (CMPQconstload {sym} [vo] ptr mem)
@@ -118,8 +130,8 @@ func rewriteValueAMD64splitload_OpAMD64CMPQconstload_0(v *Value) bool {
for {
vo := v.AuxInt
sym := v.Aux
- mem := v.Args[1]
- ptr := v.Args[0]
+ ptr := v_0
+ mem := v_1
v.reset(OpAMD64CMPQconst)
v.AuxInt = valOnly(vo)
v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
@@ -132,6 +144,9 @@ func rewriteValueAMD64splitload_OpAMD64CMPQconstload_0(v *Value) bool {
}
}
func rewriteValueAMD64splitload_OpAMD64CMPQload_0(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (CMPQload {sym} [off] ptr x mem)
@@ -139,9 +154,9 @@ func rewriteValueAMD64splitload_OpAMD64CMPQload_0(v *Value) bool {
for {
off := v.AuxInt
sym := v.Aux
- mem := v.Args[2]
- ptr := v.Args[0]
- x := v.Args[1]
+ ptr := v_0
+ x := v_1
+ mem := v_2
v.reset(OpAMD64CMPQ)
v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
v0.AuxInt = off
@@ -154,6 +169,8 @@ func rewriteValueAMD64splitload_OpAMD64CMPQload_0(v *Value) bool {
}
}
func rewriteValueAMD64splitload_OpAMD64CMPWconstload_0(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (CMPWconstload {sym} [vo] ptr mem)
@@ -161,8 +178,8 @@ func rewriteValueAMD64splitload_OpAMD64CMPWconstload_0(v *Value) bool {
for {
vo := v.AuxInt
sym := v.Aux
- mem := v.Args[1]
- ptr := v.Args[0]
+ ptr := v_0
+ mem := v_1
v.reset(OpAMD64CMPWconst)
v.AuxInt = valOnly(vo)
v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
@@ -175,6 +192,9 @@ func rewriteValueAMD64splitload_OpAMD64CMPWconstload_0(v *Value) bool {
}
}
func rewriteValueAMD64splitload_OpAMD64CMPWload_0(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (CMPWload {sym} [off] ptr x mem)
@@ -182,9 +202,9 @@ func rewriteValueAMD64splitload_OpAMD64CMPWload_0(v *Value) bool {
for {
off := v.AuxInt
sym := v.Aux
- mem := v.Args[2]
- ptr := v.Args[0]
- x := v.Args[1]
+ ptr := v_0
+ x := v_1
+ mem := v_2
v.reset(OpAMD64CMPW)
v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
v0.AuxInt = off