aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/fuse_comparisons.go
diff options
context:
space:
mode:
authorMichael Munday <mike.munday@ibm.com>2019-05-20 11:55:56 -0700
committerMichael Munday <mike.munday@ibm.com>2020-03-03 14:30:26 +0000
commite37cc298636abcd500aa8acc7375d001c431c64e (patch)
tree77ea7a6ef229d1327a49788d1290e5cf73b33734 /src/cmd/compile/internal/ssa/fuse_comparisons.go
parentc9ece81cc8c1a81ebdebcf6dfc13ebf5c4cbdb61 (diff)
downloadgo-e37cc298636abcd500aa8acc7375d001c431c64e.tar.gz
go-e37cc298636abcd500aa8acc7375d001c431c64e.zip
cmd/compile: optimize integer-in-range checks
This CL incorporates code from CL 201206 by Josh Bleecher Snyder (thanks Josh). This CL restores the integer-in-range optimizations in the SSA backend. The fuse pass is enhanced to detect inequalities that could be merged and fuse their associated blocks while the generic rules optimize them into a single unsigned comparison. For example, the inequality `x >= 0 && x < 10` will now be optimized to `unsigned(x) < 10`. Overall has a fairly positive impact on binary sizes. name old time/op new time/op delta Template 192ms ± 1% 192ms ± 1% ~ (p=0.757 n=17+18) Unicode 76.6ms ± 2% 76.5ms ± 2% ~ (p=0.603 n=19+19) GoTypes 694ms ± 1% 693ms ± 1% ~ (p=0.569 n=19+20) Compiler 3.26s ± 0% 3.27s ± 0% +0.25% (p=0.000 n=20+20) SSA 7.41s ± 0% 7.49s ± 0% +1.10% (p=0.000 n=17+19) Flate 120ms ± 1% 120ms ± 1% +0.38% (p=0.003 n=19+19) GoParser 152ms ± 1% 152ms ± 1% ~ (p=0.061 n=17+19) Reflect 422ms ± 1% 425ms ± 2% +0.76% (p=0.001 n=18+20) Tar 167ms ± 1% 167ms ± 0% ~ (p=0.730 n=18+19) XML 233ms ± 4% 231ms ± 1% ~ (p=0.752 n=20+17) LinkCompiler 927ms ± 8% 928ms ± 8% ~ (p=0.857 n=19+20) ExternalLinkCompiler 1.81s ± 2% 1.81s ± 2% ~ (p=0.513 n=19+20) LinkWithoutDebugCompiler 556ms ±10% 583ms ±13% +4.95% (p=0.007 n=20+20) [Geo mean] 478ms 481ms +0.52% name old user-time/op new user-time/op delta Template 270ms ± 5% 269ms ± 7% ~ (p=0.925 n=20+20) Unicode 134ms ± 7% 131ms ±14% ~ (p=0.593 n=18+20) GoTypes 981ms ± 3% 987ms ± 2% +0.63% (p=0.049 n=19+18) Compiler 4.50s ± 2% 4.50s ± 1% ~ (p=0.588 n=19+20) SSA 10.6s ± 2% 10.6s ± 1% ~ (p=0.141 n=20+19) Flate 164ms ± 8% 165ms ±10% ~ (p=0.738 n=20+20) GoParser 202ms ± 5% 203ms ± 6% ~ (p=0.820 n=20+20) Reflect 587ms ± 6% 597ms ± 3% ~ (p=0.087 n=20+18) Tar 230ms ± 6% 228ms ± 8% ~ (p=0.569 n=19+20) XML 311ms ± 6% 314ms ± 5% ~ (p=0.369 n=20+20) LinkCompiler 878ms ± 8% 887ms ± 7% ~ (p=0.289 n=20+20) ExternalLinkCompiler 1.60s ± 7% 1.60s ± 7% ~ (p=0.820 n=20+20) LinkWithoutDebugCompiler 498ms ±12% 489ms ±11% ~ (p=0.398 n=20+20) [Geo mean] 611ms 611ms +0.05% name old alloc/op new alloc/op delta Template 36.1MB ± 0% 36.0MB ± 0% -0.32% (p=0.000 n=20+20) Unicode 28.3MB ± 0% 28.3MB ± 0% -0.03% (p=0.000 n=19+20) GoTypes 121MB ± 0% 121MB ± 0% ~ (p=0.226 n=16+20) Compiler 563MB ± 0% 563MB ± 0% ~ (p=0.166 n=20+19) SSA 1.32GB ± 0% 1.33GB ± 0% +0.88% (p=0.000 n=20+19) Flate 22.7MB ± 0% 22.7MB ± 0% -0.02% (p=0.033 n=19+20) GoParser 27.9MB ± 0% 27.9MB ± 0% -0.02% (p=0.001 n=20+20) Reflect 78.3MB ± 0% 78.2MB ± 0% -0.01% (p=0.019 n=20+20) Tar 34.0MB ± 0% 34.0MB ± 0% -0.04% (p=0.000 n=20+20) XML 43.9MB ± 0% 43.9MB ± 0% -0.07% (p=0.000 n=20+19) LinkCompiler 205MB ± 0% 205MB ± 0% +0.44% (p=0.000 n=20+18) ExternalLinkCompiler 223MB ± 0% 223MB ± 0% +0.03% (p=0.000 n=20+20) LinkWithoutDebugCompiler 139MB ± 0% 142MB ± 0% +1.75% (p=0.000 n=20+20) [Geo mean] 93.7MB 93.9MB +0.20% name old allocs/op new allocs/op delta Template 363k ± 0% 361k ± 0% -0.58% (p=0.000 n=20+19) Unicode 329k ± 0% 329k ± 0% -0.06% (p=0.000 n=19+20) GoTypes 1.28M ± 0% 1.28M ± 0% -0.01% (p=0.000 n=20+20) Compiler 5.40M ± 0% 5.40M ± 0% -0.01% (p=0.000 n=20+20) SSA 12.7M ± 0% 12.8M ± 0% +0.80% (p=0.000 n=20+20) Flate 228k ± 0% 228k ± 0% ~ (p=0.194 n=20+20) GoParser 295k ± 0% 295k ± 0% -0.04% (p=0.000 n=20+20) Reflect 949k ± 0% 949k ± 0% -0.01% (p=0.000 n=20+20) Tar 337k ± 0% 337k ± 0% -0.06% (p=0.000 n=20+20) XML 418k ± 0% 417k ± 0% -0.17% (p=0.000 n=20+20) LinkCompiler 553k ± 0% 554k ± 0% +0.22% (p=0.000 n=20+19) ExternalLinkCompiler 1.52M ± 0% 1.52M ± 0% +0.27% (p=0.000 n=20+20) LinkWithoutDebugCompiler 186k ± 0% 186k ± 0% +0.06% (p=0.000 n=20+20) [Geo mean] 723k 723k +0.03% name old text-bytes new text-bytes delta HelloSize 828kB ± 0% 828kB ± 0% -0.01% (p=0.000 n=20+20) name old data-bytes new data-bytes delta HelloSize 13.4kB ± 0% 13.4kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 180kB ± 0% 180kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.23MB ± 0% 1.23MB ± 0% -0.33% (p=0.000 n=20+20) file before after Δ % addr2line 4320075 4311883 -8192 -0.190% asm 5191932 5187836 -4096 -0.079% buildid 2835338 2831242 -4096 -0.144% compile 20531717 20569099 +37382 +0.182% cover 5322511 5318415 -4096 -0.077% dist 3723749 3719653 -4096 -0.110% doc 4743515 4739419 -4096 -0.086% fix 3413960 3409864 -4096 -0.120% link 6690119 6686023 -4096 -0.061% nm 4269616 4265520 -4096 -0.096% pprof 14942189 14929901 -12288 -0.082% trace 11807164 11790780 -16384 -0.139% vet 8384104 8388200 +4096 +0.049% go 15339076 15334980 -4096 -0.027% total 132258257 132226007 -32250 -0.024% Fixes #30645. Change-Id: If551ac5996097f3685870d083151b5843170aab0 Reviewed-on: https://go-review.googlesource.com/c/go/+/165998 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/fuse_comparisons.go')
-rw-r--r--src/cmd/compile/internal/ssa/fuse_comparisons.go157
1 files changed, 157 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/fuse_comparisons.go b/src/cmd/compile/internal/ssa/fuse_comparisons.go
new file mode 100644
index 0000000000..d843fc3fda
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/fuse_comparisons.go
@@ -0,0 +1,157 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// fuseIntegerComparisons optimizes inequalities such as '1 <= x && x < 5',
+// which can be optimized to 'unsigned(x-1) < 4'.
+//
+// Look for branch structure like:
+//
+// p
+// |\
+// | b
+// |/ \
+// s0 s1
+//
+// In our example, p has control '1 <= x', b has control 'x < 5',
+// and s0 and s1 are the if and else results of the comparison.
+//
+// This will be optimized into:
+//
+// p
+// \
+// b
+// / \
+// s0 s1
+//
+// where b has the combined control value 'unsigned(x-1) < 4'.
+// Later passes will then fuse p and b.
+func fuseIntegerComparisons(b *Block) bool {
+ if len(b.Preds) != 1 {
+ return false
+ }
+ p := b.Preds[0].Block()
+ if b.Kind != BlockIf || p.Kind != BlockIf {
+ return false
+ }
+
+ // Don't merge control values if b is likely to be bypassed anyway.
+ if p.Likely == BranchLikely && p.Succs[0].Block() != b {
+ return false
+ }
+ if p.Likely == BranchUnlikely && p.Succs[1].Block() != b {
+ return false
+ }
+
+ // Check if the control values combine to make an integer inequality that
+ // can be further optimized later.
+ bc := b.Controls[0]
+ pc := p.Controls[0]
+ if !areMergeableInequalities(bc, pc) {
+ return false
+ }
+
+ // If the first (true) successors match then we have a disjunction (||).
+ // If the second (false) successors match then we have a conjunction (&&).
+ for i, op := range [2]Op{OpOrB, OpAndB} {
+ if p.Succs[i].Block() != b.Succs[i].Block() {
+ continue
+ }
+
+ // TODO(mundaym): should we also check the cost of executing b?
+ // Currently we might speculatively execute b even if b contains
+ // a lot of instructions. We could just check that len(b.Values)
+ // is lower than a fixed amount. Bear in mind however that the
+ // other optimization passes might yet reduce the cost of b
+ // significantly so we shouldn't be overly conservative.
+ if !canSpeculativelyExecute(b) {
+ return false
+ }
+
+ // Logically combine the control values for p and b.
+ v := b.NewValue0(bc.Pos, op, bc.Type)
+ v.AddArg(pc)
+ v.AddArg(bc)
+
+ // Set the combined control value as the control value for b.
+ b.SetControl(v)
+
+ // Modify p so that it jumps directly to b.
+ p.removeEdge(i)
+ p.Kind = BlockPlain
+ p.Likely = BranchUnknown
+ p.ResetControls()
+
+ return true
+ }
+
+ // TODO: could negate condition(s) to merge controls.
+ return false
+}
+
+// getConstIntArgIndex returns the index of the first argument that is a
+// constant integer or -1 if no such argument exists.
+func getConstIntArgIndex(v *Value) int {
+ for i, a := range v.Args {
+ switch a.Op {
+ case OpConst8, OpConst16, OpConst32, OpConst64:
+ return i
+ }
+ }
+ return -1
+}
+
+// isSignedInequality reports whether op represents the inequality < or ≤
+// in the signed domain.
+func isSignedInequality(v *Value) bool {
+ switch v.Op {
+ case OpLess64, OpLess32, OpLess16, OpLess8,
+ OpLeq64, OpLeq32, OpLeq16, OpLeq8:
+ return true
+ }
+ return false
+}
+
+// isUnsignedInequality reports whether op represents the inequality < or ≤
+// in the unsigned domain.
+func isUnsignedInequality(v *Value) bool {
+ switch v.Op {
+ case OpLess64U, OpLess32U, OpLess16U, OpLess8U,
+ OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
+ return true
+ }
+ return false
+}
+
+func areMergeableInequalities(x, y *Value) bool {
+ // We need both inequalities to be either in the signed or unsigned domain.
+ // TODO(mundaym): it would also be good to merge when we have an Eq op that
+ // could be transformed into a Less/Leq. For example in the unsigned
+ // domain 'x == 0 || 3 < x' is equivalent to 'x <= 0 || 3 < x'
+ inequalityChecks := [...]func(*Value) bool{
+ isSignedInequality,
+ isUnsignedInequality,
+ }
+ for _, f := range inequalityChecks {
+ if !f(x) || !f(y) {
+ continue
+ }
+
+ // Check that both inequalities are comparisons with constants.
+ xi := getConstIntArgIndex(x)
+ if xi < 0 {
+ return false
+ }
+ yi := getConstIntArgIndex(y)
+ if yi < 0 {
+ return false
+ }
+
+ // Check that the non-constant arguments to the inequalities
+ // are the same.
+ return x.Args[xi^1] == y.Args[yi^1]
+ }
+ return false
+}