cmd/compile: add unsigned divisibility rules

"Division by invariant integers using multiplication" paper by Granlund and Montgomery contains a method for directly computing divisibility (x%c == 0 for c constant) by means of the modular inverse. The method is further elaborated in "Hacker's Delight" by Warren Section 10-17 This general rule can compute divisibilty by one multiplication and a compare for odd divisors and an additional rotate for even divisors. To apply the divisibility rule, we must take into account the rules to rewrite x%c = x-((x/c)*c) and (x/c) for c constant on the first optimization pass "opt". This complicates the matching as we want to match only in the cases where the result of (x/c) is not also available. So, we must match on the expanded form of (x/c) in the expression x == c*(x/c) in the "late opt" pass after common subexpresion elimination. Note, that if there is an intermediate opt pass introduced in the future we could simplify these rules by delaying the magic division rewrite to "late opt" and matching directly on (x/c) in the intermediate opt pass. Additional rules to lower the generic RotateLeft* ops were also applied. On amd64, the divisibility check is 25-50% faster. name old time/op new time/op delta DivconstI64-4 2.08ns ± 0% 2.08ns ± 1% ~ (p=0.881 n=5+5) DivisibleconstI64-4 2.67ns ± 0% 2.67ns ± 1% ~ (p=1.000 n=5+5) DivisibleWDivconstI64-4 2.67ns ± 0% 2.67ns ± 0% ~ (p=0.683 n=5+5) DivconstU64-4 2.08ns ± 1% 2.08ns ± 1% ~ (p=1.000 n=5+5) DivisibleconstU64-4 2.77ns ± 1% 1.55ns ± 2% -43.90% (p=0.008 n=5+5) DivisibleWDivconstU64-4 2.99ns ± 1% 2.99ns ± 1% ~ (p=1.000 n=5+5) DivconstI32-4 1.53ns ± 2% 1.53ns ± 0% ~ (p=1.000 n=5+5) DivisibleconstI32-4 2.23ns ± 0% 2.25ns ± 3% ~ (p=0.167 n=5+5) DivisibleWDivconstI32-4 2.27ns ± 1% 2.27ns ± 1% ~ (p=0.429 n=5+5) DivconstU32-4 1.78ns ± 0% 1.78ns ± 1% ~ (p=1.000 n=4+5) DivisibleconstU32-4 2.52ns ± 2% 1.26ns ± 0% -49.96% (p=0.000 n=5+4) DivisibleWDivconstU32-4 2.63ns ± 0% 2.85ns ±10% +8.29% (p=0.016 n=4+5) DivconstI16-4 1.54ns ± 0% 1.54ns ± 0% ~ (p=0.333 n=4+5) DivisibleconstI16-4 2.10ns ± 0% 2.10ns ± 1% ~ (p=0.571 n=4+5) DivisibleWDivconstI16-4 2.22ns ± 0% 2.23ns ± 1% ~ (p=0.556 n=4+5) DivconstU16-4 1.09ns ± 0% 1.01ns ± 1% -7.74% (p=0.000 n=4+5) DivisibleconstU16-4 1.83ns ± 0% 1.26ns ± 0% -31.52% (p=0.008 n=5+5) DivisibleWDivconstU16-4 1.88ns ± 0% 1.89ns ± 1% ~ (p=0.365 n=5+5) DivconstI8-4 1.54ns ± 1% 1.54ns ± 1% ~ (p=1.000 n=5+5) DivisibleconstI8-4 2.10ns ± 0% 2.11ns ± 0% ~ (p=0.238 n=5+4) DivisibleWDivconstI8-4 2.22ns ± 0% 2.23ns ± 2% ~ (p=0.762 n=5+5) DivconstU8-4 0.92ns ± 1% 0.94ns ± 1% +2.65% (p=0.008 n=5+5) DivisibleconstU8-4 1.66ns ± 0% 1.26ns ± 1% -24.28% (p=0.008 n=5+5) DivisibleWDivconstU8-4 1.79ns ± 0% 1.80ns ± 1% ~ (p=0.079 n=4+5) A follow-up change will address the signed division case. Updates #30282 Change-Id: I7e995f167179aa5c76bb10fbcbeb49c520943403 Reviewed-on: https://go-review.googlesource.com/c/go/+/168037 Run-TryBot: Brian Kessler <brian.m.kessler@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
author: Brian Kessler <brian.m.kessler@gmail.com> 2019-03-09 21:58:16 -0700
committer: Keith Randall <khr@golang.org> 2019-04-27 20:46:46 +0000
commit: a28a9427688846f971017924bcf6e8cb623ffba4 (patch)
tree: 43755dc15e095de118b125337463055da660fc8a /src/cmd/compile/internal/ssa/rewriteMIPS64.go
parent: f67f5511ee0f225bcc8943994ba6139eed375e85 (diff)
download: go-a28a9427688846f971017924bcf6e8cb623ffba4.tar.gz
go-a28a9427688846f971017924bcf6e8cb623ffba4.zip
1 files changed, 136 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS64.go b/src/cmd/compile/internal/ssa/rewriteMIPS64.go
index 453b81e75f..93087fb759 100644
--- a/src/cmd/compile/internal/ssa/rewriteMIPS64.go
+++ b/src/cmd/compile/internal/ssa/rewriteMIPS64.go
@@ -471,6 +471,14 @@ func rewriteValueMIPS64(v *Value) bool {
 		return rewriteValueMIPS64_OpOrB_0(v)
 	case OpPanicBounds:
 		return rewriteValueMIPS64_OpPanicBounds_0(v)
+	case OpRotateLeft16:
+		return rewriteValueMIPS64_OpRotateLeft16_0(v)
+	case OpRotateLeft32:
+		return rewriteValueMIPS64_OpRotateLeft32_0(v)
+	case OpRotateLeft64:
+		return rewriteValueMIPS64_OpRotateLeft64_0(v)
+	case OpRotateLeft8:
+		return rewriteValueMIPS64_OpRotateLeft8_0(v)
 	case OpRound32F:
 		return rewriteValueMIPS64_OpRound32F_0(v)
 	case OpRound64F:
@@ -7471,6 +7479,134 @@ func rewriteValueMIPS64_OpPanicBounds_0(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueMIPS64_OpRotateLeft16_0(v *Value) bool {
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (RotateLeft16 <t> x (MOVVconst [c]))
+	// cond:
+	// result: (Or16 (Lsh16x64 <t> x (MOVVconst [c&15])) (Rsh16Ux64 <t> x (MOVVconst [-c&15])))
+	for {
+		t := v.Type
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpMIPS64MOVVconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpOr16)
+		v0 := b.NewValue0(v.Pos, OpLsh16x64, t)
+		v0.AddArg(x)
+		v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+		v1.AuxInt = c & 15
+		v0.AddArg(v1)
+		v.AddArg(v0)
+		v2 := b.NewValue0(v.Pos, OpRsh16Ux64, t)
+		v2.AddArg(x)
+		v3 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+		v3.AuxInt = -c & 15
+		v2.AddArg(v3)
+		v.AddArg(v2)
+		return true
+	}
+	return false
+}
+func rewriteValueMIPS64_OpRotateLeft32_0(v *Value) bool {
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (RotateLeft32 <t> x (MOVVconst [c]))
+	// cond:
+	// result: (Or32 (Lsh32x64 <t> x (MOVVconst [c&31])) (Rsh32Ux64 <t> x (MOVVconst [-c&31])))
+	for {
+		t := v.Type
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpMIPS64MOVVconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpOr32)
+		v0 := b.NewValue0(v.Pos, OpLsh32x64, t)
+		v0.AddArg(x)
+		v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+		v1.AuxInt = c & 31
+		v0.AddArg(v1)
+		v.AddArg(v0)
+		v2 := b.NewValue0(v.Pos, OpRsh32Ux64, t)
+		v2.AddArg(x)
+		v3 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+		v3.AuxInt = -c & 31
+		v2.AddArg(v3)
+		v.AddArg(v2)
+		return true
+	}
+	return false
+}
+func rewriteValueMIPS64_OpRotateLeft64_0(v *Value) bool {
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (RotateLeft64 <t> x (MOVVconst [c]))
+	// cond:
+	// result: (Or64 (Lsh64x64 <t> x (MOVVconst [c&63])) (Rsh64Ux64 <t> x (MOVVconst [-c&63])))
+	for {
+		t := v.Type
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpMIPS64MOVVconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpOr64)
+		v0 := b.NewValue0(v.Pos, OpLsh64x64, t)
+		v0.AddArg(x)
+		v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+		v1.AuxInt = c & 63
+		v0.AddArg(v1)
+		v.AddArg(v0)
+		v2 := b.NewValue0(v.Pos, OpRsh64Ux64, t)
+		v2.AddArg(x)
+		v3 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+		v3.AuxInt = -c & 63
+		v2.AddArg(v3)
+		v.AddArg(v2)
+		return true
+	}
+	return false
+}
+func rewriteValueMIPS64_OpRotateLeft8_0(v *Value) bool {
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (RotateLeft8 <t> x (MOVVconst [c]))
+	// cond:
+	// result: (Or8 (Lsh8x64 <t> x (MOVVconst [c&7])) (Rsh8Ux64 <t> x (MOVVconst [-c&7])))
+	for {
+		t := v.Type
+		_ = v.Args[1]
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpMIPS64MOVVconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpOr8)
+		v0 := b.NewValue0(v.Pos, OpLsh8x64, t)
+		v0.AddArg(x)
+		v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+		v1.AuxInt = c & 7
+		v0.AddArg(v1)
+		v.AddArg(v0)
+		v2 := b.NewValue0(v.Pos, OpRsh8Ux64, t)
+		v2.AddArg(x)
+		v3 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
+		v3.AuxInt = -c & 7
+		v2.AddArg(v3)
+		v.AddArg(v2)
+		return true
+	}
+	return false
+}
 func rewriteValueMIPS64_OpRound32F_0(v *Value) bool {
 	// match: (Round32F x)
 	// cond:
author	Brian Kessler <brian.m.kessler@gmail.com>	2019-03-09 21:58:16 -0700
committer	Keith Randall <khr@golang.org>	2019-04-27 20:46:46 +0000
commit	a28a9427688846f971017924bcf6e8cb623ffba4 (patch)
tree	43755dc15e095de118b125337463055da660fc8a /src/cmd/compile/internal/ssa/rewriteMIPS64.go
parent	f67f5511ee0f225bcc8943994ba6139eed375e85 (diff)
download	go-a28a9427688846f971017924bcf6e8cb623ffba4.tar.gz go-a28a9427688846f971017924bcf6e8cb623ffba4.zip