diff options
author | Giovanni Bajo <rasky@develer.com> | 2018-03-05 20:59:40 +0100 |
---|---|---|
committer | Giovanni Bajo <rasky@develer.com> | 2018-03-12 18:01:33 +0000 |
commit | 080187f4f72bd6594e3c2efc35cf51bf61378552 (patch) | |
tree | f8bebc0525dc2ac8b48ed997b969a87ee39d5fa8 /src/cmd/compile/internal/ssa/branchelim.go | |
parent | fdf5aaf5555692a9e03a65df1aba06aa4a09052b (diff) | |
download | go-080187f4f72bd6594e3c2efc35cf51bf61378552.tar.gz go-080187f4f72bd6594e3c2efc35cf51bf61378552.zip |
cmd/compile: implement CMOV on amd64
This builds upon the branchelim pass, activating it for amd64 and
lowering CondSelect. Special care is made to FPU instructions for
NaN handling.
Benchmark results on Xeon E5630 (Westmere EP):
name old time/op new time/op delta
BinaryTree17-16 4.99s ± 9% 4.66s ± 2% ~ (p=0.095 n=5+5)
Fannkuch11-16 4.93s ± 3% 5.04s ± 2% ~ (p=0.548 n=5+5)
FmtFprintfEmpty-16 58.8ns ± 7% 61.4ns ±14% ~ (p=0.579 n=5+5)
FmtFprintfString-16 114ns ± 2% 114ns ± 4% ~ (p=0.603 n=5+5)
FmtFprintfInt-16 181ns ± 4% 125ns ± 3% -30.90% (p=0.008 n=5+5)
FmtFprintfIntInt-16 263ns ± 2% 217ns ± 2% -17.34% (p=0.008 n=5+5)
FmtFprintfPrefixedInt-16 230ns ± 1% 212ns ± 1% -7.99% (p=0.008 n=5+5)
FmtFprintfFloat-16 411ns ± 3% 344ns ± 5% -16.43% (p=0.008 n=5+5)
FmtManyArgs-16 828ns ± 4% 790ns ± 2% -4.59% (p=0.032 n=5+5)
GobDecode-16 10.9ms ± 4% 10.8ms ± 5% ~ (p=0.548 n=5+5)
GobEncode-16 9.52ms ± 5% 9.46ms ± 2% ~ (p=1.000 n=5+5)
Gzip-16 334ms ± 2% 337ms ± 2% ~ (p=0.548 n=5+5)
Gunzip-16 64.4ms ± 1% 65.0ms ± 1% +1.00% (p=0.008 n=5+5)
HTTPClientServer-16 156µs ± 3% 155µs ± 3% ~ (p=0.690 n=5+5)
JSONEncode-16 21.0ms ± 1% 21.8ms ± 0% +3.76% (p=0.016 n=5+4)
JSONDecode-16 95.1ms ± 0% 95.7ms ± 1% ~ (p=0.151 n=5+5)
Mandelbrot200-16 6.38ms ± 1% 6.42ms ± 1% ~ (p=0.095 n=5+5)
GoParse-16 5.47ms ± 2% 5.36ms ± 1% -1.95% (p=0.016 n=5+5)
RegexpMatchEasy0_32-16 111ns ± 1% 111ns ± 1% ~ (p=0.635 n=5+4)
RegexpMatchEasy0_1K-16 408ns ± 1% 411ns ± 2% ~ (p=0.087 n=5+5)
RegexpMatchEasy1_32-16 103ns ± 1% 104ns ± 1% ~ (p=0.484 n=5+5)
RegexpMatchEasy1_1K-16 659ns ± 2% 652ns ± 1% ~ (p=0.571 n=5+5)
RegexpMatchMedium_32-16 176ns ± 2% 174ns ± 1% ~ (p=0.476 n=5+5)
RegexpMatchMedium_1K-16 58.6µs ± 4% 57.7µs ± 4% ~ (p=0.548 n=5+5)
RegexpMatchHard_32-16 3.07µs ± 3% 3.04µs ± 4% ~ (p=0.421 n=5+5)
RegexpMatchHard_1K-16 89.2µs ± 1% 87.9µs ± 2% -1.52% (p=0.032 n=5+5)
Revcomp-16 575ms ± 0% 587ms ± 2% +2.12% (p=0.032 n=4+5)
Template-16 110ms ± 1% 107ms ± 3% -3.00% (p=0.032 n=5+5)
TimeParse-16 463ns ± 0% 462ns ± 0% ~ (p=0.810 n=5+4)
TimeFormat-16 538ns ± 0% 535ns ± 0% -0.63% (p=0.024 n=5+5)
name old speed new speed delta
GobDecode-16 70.7MB/s ± 4% 71.4MB/s ± 5% ~ (p=0.452 n=5+5)
GobEncode-16 80.7MB/s ± 5% 81.2MB/s ± 2% ~ (p=1.000 n=5+5)
Gzip-16 58.2MB/s ± 2% 57.7MB/s ± 2% ~ (p=0.452 n=5+5)
Gunzip-16 302MB/s ± 1% 299MB/s ± 1% -0.99% (p=0.008 n=5+5)
JSONEncode-16 92.4MB/s ± 1% 89.1MB/s ± 0% -3.63% (p=0.016 n=5+4)
JSONDecode-16 20.4MB/s ± 0% 20.3MB/s ± 1% ~ (p=0.135 n=5+5)
GoParse-16 10.6MB/s ± 2% 10.8MB/s ± 1% +2.00% (p=0.016 n=5+5)
RegexpMatchEasy0_32-16 286MB/s ± 1% 285MB/s ± 3% ~ (p=1.000 n=5+5)
RegexpMatchEasy0_1K-16 2.51GB/s ± 1% 2.49GB/s ± 2% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32-16 309MB/s ± 1% 307MB/s ± 1% ~ (p=0.548 n=5+5)
RegexpMatchEasy1_1K-16 1.55GB/s ± 2% 1.57GB/s ± 1% ~ (p=0.690 n=5+5)
RegexpMatchMedium_32-16 5.68MB/s ± 2% 5.73MB/s ± 1% ~ (p=0.579 n=5+5)
RegexpMatchMedium_1K-16 17.5MB/s ± 4% 17.8MB/s ± 4% ~ (p=0.500 n=5+5)
RegexpMatchHard_32-16 10.4MB/s ± 3% 10.5MB/s ± 4% ~ (p=0.460 n=5+5)
RegexpMatchHard_1K-16 11.5MB/s ± 1% 11.7MB/s ± 2% +1.57% (p=0.032 n=5+5)
Revcomp-16 442MB/s ± 0% 433MB/s ± 2% -2.05% (p=0.032 n=4+5)
Template-16 17.7MB/s ± 1% 18.2MB/s ± 3% +3.12% (p=0.032 n=5+5)
Change-Id: Ic7cb7374d07da031e771bdcbfdd832fd1b17159c
Reviewed-on: https://go-review.googlesource.com/98695
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/branchelim.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/branchelim.go | 27 |
1 files changed, 21 insertions, 6 deletions
diff --git a/src/cmd/compile/internal/ssa/branchelim.go b/src/cmd/compile/internal/ssa/branchelim.go index 54508985b3..75a6b8238c 100644 --- a/src/cmd/compile/internal/ssa/branchelim.go +++ b/src/cmd/compile/internal/ssa/branchelim.go @@ -19,7 +19,10 @@ package ssa // rewrite Phis in the postdominator as CondSelects. func branchelim(f *Func) { // FIXME: add support for lowering CondSelects on more architectures - if f.Config.arch != "arm64" { + switch f.Config.arch { + case "arm64", "amd64": + // implemented + default: return } @@ -32,10 +35,22 @@ func branchelim(f *Func) { } } -func canCondSelect(v *Value) bool { +func canCondSelect(v *Value, arch string) bool { // For now, stick to simple scalars that fit in registers - sz := v.Type.Size() - return sz <= v.Block.Func.Config.RegSize && (v.Type.IsInteger() || v.Type.IsPtrShaped()) + switch { + case v.Type.Size() > v.Block.Func.Config.RegSize: + return false + case v.Type.IsPtrShaped(): + return true + case v.Type.IsInteger(): + if arch == "amd64" && v.Type.Size() < 2 { + // amd64 doesn't support CMOV with byte registers + return false + } + return true + default: + return false + } } func elimIf(f *Func, dom *Block) bool { @@ -68,7 +83,7 @@ func elimIf(f *Func, dom *Block) bool { for _, v := range post.Values { if v.Op == OpPhi { hasphis = true - if !canCondSelect(v) { + if !canCondSelect(v, f.Config.arch) { return false } } @@ -169,7 +184,7 @@ func elimIfElse(f *Func, b *Block) bool { for _, v := range post.Values { if v.Op == OpPhi { hasphis = true - if !canCondSelect(v) { + if !canCondSelect(v, f.Config.arch) { return false } } |