aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/flagalloc.go
diff options
context:
space:
mode:
authorBen Shi <powerman1st@163.com>2018-07-29 12:50:50 +0000
committerBen Shi <powerman1st@163.com>2018-08-20 14:23:22 +0000
commit556971316f363233755283518dfda4b1cf5980d8 (patch)
treef90e632fecc48d8f367fcb61e3fc7dbf42872085 /src/cmd/compile/internal/ssa/flagalloc.go
parentb75c5c5992686a7eac1a33383e8b9b14b579556e (diff)
downloadgo-556971316f363233755283518dfda4b1cf5980d8.tar.gz
go-556971316f363233755283518dfda4b1cf5980d8.zip
cmd/compile: optimize 386's comparison
CMPL/CMPW/CMPB can take a memory operand on 386, and this CL implements that optimization. 1. The total size of pkg/linux_386 decreases about 45KB, excluding cmd/compile. 2. The go1 benchmark shows a little improvement. name old time/op new time/op delta BinaryTree17-4 3.36s ± 2% 3.37s ± 3% ~ (p=0.537 n=40+40) Fannkuch11-4 3.59s ± 1% 3.53s ± 2% -1.58% (p=0.000 n=40+40) FmtFprintfEmpty-4 46.0ns ± 3% 45.8ns ± 3% ~ (p=0.249 n=40+40) FmtFprintfString-4 80.0ns ± 4% 78.8ns ± 3% -1.49% (p=0.001 n=40+40) FmtFprintfInt-4 89.7ns ± 2% 90.3ns ± 2% +0.74% (p=0.003 n=40+40) FmtFprintfIntInt-4 144ns ± 3% 143ns ± 3% -0.95% (p=0.003 n=40+40) FmtFprintfPrefixedInt-4 181ns ± 4% 180ns ± 2% ~ (p=0.103 n=40+40) FmtFprintfFloat-4 412ns ± 3% 408ns ± 4% -0.97% (p=0.018 n=40+40) FmtManyArgs-4 607ns ± 4% 605ns ± 4% ~ (p=0.148 n=40+40) GobDecode-4 7.19ms ± 4% 7.24ms ± 5% ~ (p=0.340 n=40+40) GobEncode-4 7.04ms ± 9% 6.99ms ± 9% ~ (p=0.289 n=40+40) Gzip-4 400ms ± 6% 398ms ± 5% ~ (p=0.168 n=40+40) Gunzip-4 41.2ms ± 3% 41.7ms ± 3% +1.40% (p=0.001 n=40+40) HTTPClientServer-4 62.5µs ± 1% 62.1µs ± 2% -0.61% (p=0.000 n=37+37) JSONEncode-4 20.7ms ± 4% 20.4ms ± 3% -1.60% (p=0.000 n=40+40) JSONDecode-4 69.4ms ± 4% 69.2ms ± 6% ~ (p=0.177 n=40+40) Mandelbrot200-4 5.22ms ± 6% 5.21ms ± 3% ~ (p=0.531 n=40+40) GoParse-4 3.29ms ± 3% 3.28ms ± 3% ~ (p=0.321 n=40+39) RegexpMatchEasy0_32-4 104ns ± 4% 103ns ± 7% -0.89% (p=0.040 n=40+40) RegexpMatchEasy0_1K-4 852ns ± 3% 853ns ± 2% ~ (p=0.357 n=40+40) RegexpMatchEasy1_32-4 113ns ± 8% 113ns ± 3% ~ (p=0.906 n=40+40) RegexpMatchEasy1_1K-4 1.03µs ± 4% 1.03µs ± 5% ~ (p=0.326 n=40+40) RegexpMatchMedium_32-4 136ns ± 3% 133ns ± 3% -2.31% (p=0.000 n=40+40) RegexpMatchMedium_1K-4 44.0µs ± 3% 43.7µs ± 3% ~ (p=0.053 n=40+40) RegexpMatchHard_32-4 2.27µs ± 3% 2.26µs ± 4% ~ (p=0.391 n=40+40) RegexpMatchHard_1K-4 68.0µs ± 3% 68.9µs ± 3% +1.28% (p=0.000 n=40+40) Revcomp-4 1.86s ± 5% 1.86s ± 2% ~ (p=0.950 n=40+40) Template-4 73.4ms ± 4% 69.9ms ± 7% -4.78% (p=0.000 n=40+40) TimeParse-4 449ns ± 4% 441ns ± 5% -1.76% (p=0.000 n=40+40) TimeFormat-4 416ns ± 3% 417ns ± 4% ~ (p=0.304 n=40+40) [Geo mean] 67.7µs 67.3µs -0.55% name old speed new speed delta GobDecode-4 107MB/s ± 4% 106MB/s ± 5% ~ (p=0.336 n=40+40) GobEncode-4 109MB/s ± 5% 110MB/s ± 9% ~ (p=0.142 n=38+40) Gzip-4 48.5MB/s ± 5% 48.8MB/s ± 5% ~ (p=0.172 n=40+40) Gunzip-4 472MB/s ± 3% 465MB/s ± 3% -1.39% (p=0.001 n=40+40) JSONEncode-4 93.6MB/s ± 4% 95.1MB/s ± 3% +1.61% (p=0.000 n=40+40) JSONDecode-4 28.0MB/s ± 3% 28.1MB/s ± 6% ~ (p=0.181 n=40+40) GoParse-4 17.6MB/s ± 3% 17.7MB/s ± 3% ~ (p=0.350 n=40+39) RegexpMatchEasy0_32-4 308MB/s ± 4% 311MB/s ± 6% +0.96% (p=0.025 n=40+40) RegexpMatchEasy0_1K-4 1.20GB/s ± 3% 1.20GB/s ± 2% ~ (p=0.317 n=40+40) RegexpMatchEasy1_32-4 282MB/s ± 7% 282MB/s ± 3% ~ (p=0.516 n=40+40) RegexpMatchEasy1_1K-4 994MB/s ± 4% 991MB/s ± 5% ~ (p=0.319 n=40+40) RegexpMatchMedium_32-4 7.31MB/s ± 3% 7.49MB/s ± 3% +2.46% (p=0.000 n=40+40) RegexpMatchMedium_1K-4 23.3MB/s ± 3% 23.4MB/s ± 3% ~ (p=0.052 n=40+40) RegexpMatchHard_32-4 14.1MB/s ± 3% 14.1MB/s ± 4% ~ (p=0.391 n=40+40) RegexpMatchHard_1K-4 15.1MB/s ± 3% 14.9MB/s ± 3% -1.27% (p=0.000 n=40+40) Revcomp-4 137MB/s ± 5% 137MB/s ± 2% ~ (p=0.942 n=40+40) Template-4 26.5MB/s ± 4% 27.8MB/s ± 7% +5.03% (p=0.000 n=40+40) [Geo mean] 78.6MB/s 79.0MB/s +0.57% Change-Id: Idcacc6881ef57cd7dc33aa87b711282842b72a53 Reviewed-on: https://go-review.googlesource.com/126618 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/flagalloc.go')
-rw-r--r--src/cmd/compile/internal/ssa/flagalloc.go68
1 files changed, 46 insertions, 22 deletions
diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go
index 050595ff8d..56c12e320a 100644
--- a/src/cmd/compile/internal/ssa/flagalloc.go
+++ b/src/cmd/compile/internal/ssa/flagalloc.go
@@ -4,6 +4,30 @@
package ssa
+// When breaking up a combined load-compare to separated load and compare operations,
+// opLoad specifies the load operation, and opCmp specifies the compare operation.
+type typeCmdLoadMap struct {
+ opLoad Op
+ opCmp Op
+}
+
+var opCmpLoadMap = map[Op]typeCmdLoadMap{
+ OpAMD64CMPQload: {OpAMD64MOVQload, OpAMD64CMPQ},
+ OpAMD64CMPLload: {OpAMD64MOVLload, OpAMD64CMPL},
+ OpAMD64CMPWload: {OpAMD64MOVWload, OpAMD64CMPW},
+ OpAMD64CMPBload: {OpAMD64MOVBload, OpAMD64CMPB},
+ Op386CMPLload: {Op386MOVLload, Op386CMPL},
+ Op386CMPWload: {Op386MOVWload, Op386CMPW},
+ Op386CMPBload: {Op386MOVBload, Op386CMPB},
+ OpAMD64CMPQconstload: {OpAMD64MOVQload, OpAMD64CMPQconst},
+ OpAMD64CMPLconstload: {OpAMD64MOVLload, OpAMD64CMPLconst},
+ OpAMD64CMPWconstload: {OpAMD64MOVWload, OpAMD64CMPWconst},
+ OpAMD64CMPBconstload: {OpAMD64MOVBload, OpAMD64CMPBconst},
+ Op386CMPLconstload: {Op386MOVLload, Op386CMPLconst},
+ Op386CMPWconstload: {Op386MOVWload, Op386CMPWconst},
+ Op386CMPBconstload: {Op386MOVBload, Op386CMPBconst},
+}
+
// flagalloc allocates the flag register among all the flag-generating
// instructions. Flag values are recomputed if they need to be
// spilled/restored.
@@ -122,55 +146,55 @@ func flagalloc(f *Func) {
if spill[v.ID] && v.MemoryArg() != nil {
switch v.Op {
case OpAMD64CMPQload:
- load := b.NewValue2IA(v.Pos, OpAMD64MOVQload, f.Config.Types.UInt64, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
- v.Op = OpAMD64CMPQ
+ load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt64, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
+ v.Op = opCmpLoadMap[v.Op].opCmp
v.AuxInt = 0
v.Aux = nil
v.SetArgs2(load, v.Args[1])
- case OpAMD64CMPLload:
- load := b.NewValue2IA(v.Pos, OpAMD64MOVLload, f.Config.Types.UInt32, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
- v.Op = OpAMD64CMPL
+ case OpAMD64CMPLload, Op386CMPLload:
+ load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt32, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
+ v.Op = opCmpLoadMap[v.Op].opCmp
v.AuxInt = 0
v.Aux = nil
v.SetArgs2(load, v.Args[1])
- case OpAMD64CMPWload:
- load := b.NewValue2IA(v.Pos, OpAMD64MOVWload, f.Config.Types.UInt16, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
- v.Op = OpAMD64CMPW
+ case OpAMD64CMPWload, Op386CMPWload:
+ load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt16, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
+ v.Op = opCmpLoadMap[v.Op].opCmp
v.AuxInt = 0
v.Aux = nil
v.SetArgs2(load, v.Args[1])
- case OpAMD64CMPBload:
- load := b.NewValue2IA(v.Pos, OpAMD64MOVBload, f.Config.Types.UInt8, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
- v.Op = OpAMD64CMPB
+ case OpAMD64CMPBload, Op386CMPBload:
+ load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt8, v.AuxInt, v.Aux, v.Args[0], v.Args[2])
+ v.Op = opCmpLoadMap[v.Op].opCmp
v.AuxInt = 0
v.Aux = nil
v.SetArgs2(load, v.Args[1])
case OpAMD64CMPQconstload:
vo := v.AuxValAndOff()
- load := b.NewValue2IA(v.Pos, OpAMD64MOVQload, f.Config.Types.UInt64, vo.Off(), v.Aux, v.Args[0], v.Args[1])
- v.Op = OpAMD64CMPQconst
+ load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt64, vo.Off(), v.Aux, v.Args[0], v.Args[1])
+ v.Op = opCmpLoadMap[v.Op].opCmp
v.AuxInt = vo.Val()
v.Aux = nil
v.SetArgs1(load)
- case OpAMD64CMPLconstload:
+ case OpAMD64CMPLconstload, Op386CMPLconstload:
vo := v.AuxValAndOff()
- load := b.NewValue2IA(v.Pos, OpAMD64MOVLload, f.Config.Types.UInt32, vo.Off(), v.Aux, v.Args[0], v.Args[1])
- v.Op = OpAMD64CMPLconst
+ load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt32, vo.Off(), v.Aux, v.Args[0], v.Args[1])
+ v.Op = opCmpLoadMap[v.Op].opCmp
v.AuxInt = vo.Val()
v.Aux = nil
v.SetArgs1(load)
- case OpAMD64CMPWconstload:
+ case OpAMD64CMPWconstload, Op386CMPWconstload:
vo := v.AuxValAndOff()
- load := b.NewValue2IA(v.Pos, OpAMD64MOVWload, f.Config.Types.UInt16, vo.Off(), v.Aux, v.Args[0], v.Args[1])
- v.Op = OpAMD64CMPWconst
+ load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt16, vo.Off(), v.Aux, v.Args[0], v.Args[1])
+ v.Op = opCmpLoadMap[v.Op].opCmp
v.AuxInt = vo.Val()
v.Aux = nil
v.SetArgs1(load)
- case OpAMD64CMPBconstload:
+ case OpAMD64CMPBconstload, Op386CMPBconstload:
vo := v.AuxValAndOff()
- load := b.NewValue2IA(v.Pos, OpAMD64MOVBload, f.Config.Types.UInt8, vo.Off(), v.Aux, v.Args[0], v.Args[1])
- v.Op = OpAMD64CMPBconst
+ load := b.NewValue2IA(v.Pos, opCmpLoadMap[v.Op].opLoad, f.Config.Types.UInt8, vo.Off(), v.Aux, v.Args[0], v.Args[1])
+ v.Op = opCmpLoadMap[v.Op].opCmp
v.AuxInt = vo.Val()
v.Aux = nil
v.SetArgs1(load)