aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2016-07-18 15:52:59 -0700
committerKeith Randall <khr@golang.org>2016-07-19 15:16:23 +0000
commit4a33af6bb63eaa69a4a2cc0d4f222d37d7531b9c (patch)
tree69a8c8e882aa9516e62d475d8540a459a90a28f4
parent1b0404c4cab18bae9c9e11d0699a1aeb32f08908 (diff)
downloadgo-4a33af6bb63eaa69a4a2cc0d4f222d37d7531b9c.tar.gz
go-4a33af6bb63eaa69a4a2cc0d4f222d37d7531b9c.zip
[dev.ssa] cmd/compile: more 386 port changes
Fix up zero/move code, including duff calls and rep movs. Handle the new ops generated by dec64.rules. Fix constant shifts. Change-Id: I7d89194b29b04311bfafa0fd93b9f5644af04df9 Reviewed-on: https://go-review.googlesource.com/25033 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
-rw-r--r--src/cmd/compile/internal/ssa/gen/386.rules79
-rw-r--r--src/cmd/compile/internal/ssa/gen/386Ops.go9
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64.rules4
-rw-r--r--src/cmd/compile/internal/ssa/gen/dec64.rules2
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go44
-rw-r--r--src/cmd/compile/internal/ssa/rewrite.go45
-rw-r--r--src/cmd/compile/internal/ssa/rewrite386.go606
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go6
-rw-r--r--src/cmd/compile/internal/x86/ssa.go29
9 files changed, 550 insertions, 274 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules
index 6569533b77..0587be4367 100644
--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
@@ -83,8 +83,7 @@
(Not x) -> (XORLconst [1] x)
// Lowering pointer arithmetic
-(OffPtr [off] ptr) && is32Bit(off) -> (ADDLconst [off] ptr)
-(OffPtr [off] ptr) -> (ADDL (MOVLconst [off]) ptr)
+(OffPtr [off] ptr) -> (ADDLconst [off] ptr)
(Bswap32 x) -> (BSWAPL x)
@@ -99,6 +98,9 @@
(ZeroExt8to32 x) -> (MOVBLZX x)
(ZeroExt16to32 x) -> (MOVWLZX x)
+(Signmask x) -> (SARLconst x [31])
+(Zeromask x) -> (SBBLcarrymask (CMPL (MOVLconst [0]) x))
+
// Lowering truncation
// Because we ignore high parts of registers, truncates are just copies.
(Trunc16to8 x) -> x
@@ -161,6 +163,26 @@
(Rsh8x16 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8])))))
(Rsh8x8 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
+// constant shifts
+// generic opt rewrites all constant shifts to shift by Const64
+(Lsh32x64 x (Const64 [c])) && uint64(c) < 32 -> (SHLLconst x [c])
+(Rsh32x64 x (Const64 [c])) && uint64(c) < 32 -> (SARLconst x [c])
+(Rsh32Ux64 x (Const64 [c])) && uint64(c) < 32 -> (SHRLconst x [c])
+(Lsh16x64 x (Const64 [c])) && uint64(c) < 16 -> (SHLLconst x [c])
+(Rsh16x64 x (Const64 [c])) && uint64(c) < 16 -> (SARWconst x [c])
+(Rsh16Ux64 x (Const64 [c])) && uint64(c) < 16 -> (SHRWconst x [c])
+(Lsh8x64 x (Const64 [c])) && uint64(c) < 8 -> (SHLLconst x [c])
+(Rsh8x64 x (Const64 [c])) && uint64(c) < 8 -> (SARBconst x [c])
+(Rsh8Ux64 x (Const64 [c])) && uint64(c) < 8 -> (SHRBconst x [c])
+
+// large constant shifts
+(Lsh32x64 _ (Const64 [c])) && uint64(c) >= 32 -> (Const32 [0])
+(Rsh32Ux64 _ (Const64 [c])) && uint64(c) >= 32 -> (Const32 [0])
+(Lsh16x64 _ (Const64 [c])) && uint64(c) >= 16 -> (Const16 [0])
+(Rsh16Ux64 _ (Const64 [c])) && uint64(c) >= 16 -> (Const16 [0])
+(Lsh8x64 _ (Const64 [c])) && uint64(c) >= 8 -> (Const8 [0])
+(Rsh8Ux64 _ (Const64 [c])) && uint64(c) >= 8 -> (Const8 [0])
+
// Lowering comparisons
(Less32 x y) -> (SETL (CMPL x y))
(Less16 x y) -> (SETL (CMPW x y))
@@ -241,7 +263,6 @@
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstore dst (MOVBload src mem) mem)
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 -> (MOVWstore dst (MOVWload src mem) mem)
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 -> (MOVLstore dst (MOVLload src mem) mem)
-(Move [s] dst src mem) && SizeAndAlign(s).Size() == 16 -> (MOVOstore dst (MOVOload src mem) mem)
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 3 ->
(MOVBstore [2] dst (MOVBload [2] src mem)
(MOVWstore dst (MOVWload src mem) mem))
@@ -254,21 +275,32 @@
(Move [s] dst src mem) && SizeAndAlign(s).Size() == 7 ->
(MOVLstore [3] dst (MOVLload [3] src mem)
(MOVLstore dst (MOVLload src mem) mem))
+(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 ->
+ (MOVLstore [4] dst (MOVLload [4] src mem)
+ (MOVLstore dst (MOVLload src mem) mem))
+
+// Adjust moves to be a multiple of 4 bytes.
+(Move [s] dst src mem)
+ && SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size()%4 != 0 ->
+ (Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%4]
+ (ADDLconst <dst.Type> dst [SizeAndAlign(s).Size()%4])
+ (ADDLconst <src.Type> src [SizeAndAlign(s).Size()%4])
+ (MOVLstore dst (MOVLload src mem) mem))
// Medium copying uses a duff device.
(Move [s] dst src mem)
- && SizeAndAlign(s).Size() >= 32 && SizeAndAlign(s).Size() <= 16*64 && SizeAndAlign(s).Size()%16 == 0
+ && SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size() <= 4*128 && SizeAndAlign(s).Size()%4 == 0
&& !config.noDuffDevice ->
- (DUFFCOPY [14*(64-SizeAndAlign(s).Size()/16)] dst src mem)
-// 14 and 64 are magic constants. 14 is the number of bytes to encode:
-// MOVUPS (SI), X0
-// ADDL $16, SI
-// MOVUPS X0, (DI)
-// ADDL $16, DI
-// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
+ (DUFFCOPY [10*(128-SizeAndAlign(s).Size()/4)] dst src mem)
+// 10 and 128 are magic constants. 10 is the number of bytes to encode:
+// MOVL (SI), CX
+// ADDL $4, SI
+// MOVL CX, (DI)
+// ADDL $4, DI
+// and 128 is the number of such blocks. See src/runtime/duff_386.s:duffcopy.
// Large copying uses REP MOVSL.
-(Move [s] dst src mem) && (SizeAndAlign(s).Size() > 16*64 || config.noDuffDevice) && SizeAndAlign(s).Size()%8 == 0 ->
+(Move [s] dst src mem) && (SizeAndAlign(s).Size() > 4*128 || config.noDuffDevice) && SizeAndAlign(s).Size()%4 == 0 ->
(REPMOVSL dst src (MOVLconst [SizeAndAlign(s).Size()/4]) mem)
// Lowering Zero instructions
@@ -309,11 +341,22 @@
(MOVLstoreconst [makeValAndOff(0,4)] destptr
(MOVLstoreconst [0] destptr mem))))
+// Medium zeroing uses a duff device.
+(Zero [s] destptr mem)
+ && SizeAndAlign(s).Size() > 16
+ && SizeAndAlign(s).Size() <= 4*128
+ && SizeAndAlign(s).Size()%4 == 0
+ && !config.noDuffDevice ->
+ (DUFFZERO [1*(128-SizeAndAlign(s).Size()/4)] destptr (MOVLconst [0]) mem)
+// 1 and 128 are magic constants. 1 is the number of bytes to encode STOSL.
+// 128 is the number of STOSL instructions in duffzero.
+// See src/runtime/duff_386.s:duffzero.
+
// Large zeroing uses REP STOSQ.
(Zero [s] destptr mem)
- && (SizeAndAlign(s).Size() > 1024 || (config.noDuffDevice && SizeAndAlign(s).Size() > 32))
- && SizeAndAlign(s).Size()%8 == 0 ->
- (REPSTOSL destptr (MOVLconst [SizeAndAlign(s).Size()/8]) (MOVLconst [0]) mem)
+ && (SizeAndAlign(s).Size() > 4*128 || (config.noDuffDevice && SizeAndAlign(s).Size() > 16))
+ && SizeAndAlign(s).Size()%4 == 0 ->
+ (REPSTOSL destptr (MOVLconst [SizeAndAlign(s).Size()/4]) (MOVLconst [0]) mem)
// Lowering constants
(Const8 [val]) -> (MOVLconst [val])
@@ -596,14 +639,12 @@
(MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem)
(MOVSSload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVSSload [off1+off2] {sym} ptr mem)
(MOVSDload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVSDload [off1+off2] {sym} ptr mem)
-(MOVOload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVOload [off1+off2] {sym} ptr mem)
(MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore [off1+off2] {sym} ptr val mem)
(MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem)
(MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem)
(MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSSstore [off1+off2] {sym} ptr val mem)
(MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSDstore [off1+off2] {sym} ptr val mem)
-(MOVOstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVOstore [off1+off2] {sym} ptr val mem)
// Fold constants into stores.
(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
@@ -633,8 +674,6 @@
(MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVOload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVBLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVBLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
@@ -651,8 +690,6 @@
(MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVOstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
(MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
diff --git a/src/cmd/compile/internal/ssa/gen/386Ops.go b/src/cmd/compile/internal/ssa/gen/386Ops.go
index 68bcfa9649..49c4cd49e4 100644
--- a/src/cmd/compile/internal/ssa/gen/386Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/386Ops.go
@@ -330,8 +330,6 @@ func init() {
{name: "MOVBstore", argLength: 3, reg: gpstore, asm: "MOVB", aux: "SymOff", typ: "Mem"}, // store byte in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVWstore", argLength: 3, reg: gpstore, asm: "MOVW", aux: "SymOff", typ: "Mem"}, // store 2 bytes in arg1 to arg0+auxint+aux. arg2=mem
{name: "MOVLstore", argLength: 3, reg: gpstore, asm: "MOVL", aux: "SymOff", typ: "Mem"}, // store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
- {name: "MOVOload", argLength: 2, reg: fpload, asm: "MOVUPS", aux: "SymOff", typ: "Int128"}, // load 16 bytes from arg0+auxint+aux. arg1=mem
- {name: "MOVOstore", argLength: 3, reg: fpstore, asm: "MOVUPS", aux: "SymOff", typ: "Mem"}, // store 16 bytes in arg1 to arg0+auxint+aux. arg2=mem
// indexed loads/stores
{name: "MOVBloadidx1", argLength: 3, reg: gploadidx, asm: "MOVBLZX", aux: "SymOff"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
@@ -360,7 +358,7 @@ func init() {
{name: "MOVLstoreconstidx1", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... arg1 ...
{name: "MOVLstoreconstidx4", argLength: 3, reg: gpstoreconstidx, asm: "MOVL", aux: "SymValAndOff", typ: "Mem"}, // store low 4 bytes of ... 4*arg1 ...
- // arg0 = (duff-adjusted) pointer to start of memory to zero
+ // arg0 = pointer to start of memory to zero
// arg1 = value to store (will always be zero)
// arg2 = mem
// auxint = offset into duffzero code to start executing
@@ -370,11 +368,10 @@ func init() {
aux: "Int64",
argLength: 3,
reg: regInfo{
- inputs: []regMask{buildReg("DI"), buildReg("X0")},
+ inputs: []regMask{buildReg("DI"), buildReg("AX")},
clobbers: buildReg("DI FLAGS"),
},
},
- {name: "MOVOconst", reg: regInfo{nil, 0, []regMask{fp}}, typ: "Int128", aux: "Int128", rematerializeable: true},
// arg0 = address of memory to zero
// arg1 = # of 4-byte words to zero
@@ -407,7 +404,7 @@ func init() {
argLength: 3,
reg: regInfo{
inputs: []regMask{buildReg("DI"), buildReg("SI")},
- clobbers: buildReg("DI SI X0 FLAGS"), // uses X0 as a temporary
+ clobbers: buildReg("DI SI CX FLAGS"), // uses CX as a temporary
},
},
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index b429b6f627..811e810f15 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -400,8 +400,8 @@
(Zero [SizeAndAlign(s).Size()-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem))
(Zero [s] destptr mem)
&& SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice ->
- (DUFFZERO [duffStart(SizeAndAlign(s).Size())]
- (ADDQconst [duffAdj(SizeAndAlign(s).Size())] destptr) (MOVOconst [0])
+ (DUFFZERO [duffStartAMD64(SizeAndAlign(s).Size())]
+ (ADDQconst [duffAdjAMD64(SizeAndAlign(s).Size())] destptr) (MOVOconst [0])
mem)
// Large zeroing uses REP STOSQ.
diff --git a/src/cmd/compile/internal/ssa/gen/dec64.rules b/src/cmd/compile/internal/ssa/gen/dec64.rules
index 47e2933872..8b2fd27669 100644
--- a/src/cmd/compile/internal/ssa/gen/dec64.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec64.rules
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// This file contains rules to decompose [u]int32 types on 32-bit
+// This file contains rules to decompose [u]int64 types on 32-bit
// architectures. These rules work together with the decomposeBuiltIn
// pass which handles phis of these types.
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 47cfda86b5..a09e736b79 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -285,8 +285,6 @@ const (
Op386MOVBstore
Op386MOVWstore
Op386MOVLstore
- Op386MOVOload
- Op386MOVOstore
Op386MOVBloadidx1
Op386MOVWloadidx1
Op386MOVWloadidx2
@@ -306,7 +304,6 @@ const (
Op386MOVLstoreconstidx1
Op386MOVLstoreconstidx4
Op386DUFFZERO
- Op386MOVOconst
Op386REPSTOSL
Op386CALLstatic
Op386CALLclosure
@@ -3153,32 +3150,6 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "MOVOload",
- auxType: auxSymOff,
- argLen: 2,
- asm: x86.AMOVUPS,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 65791}, // AX CX DX BX SP BP SI DI SB
- },
- outputs: []outputInfo{
- {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
- },
- },
- },
- {
- name: "MOVOstore",
- auxType: auxSymOff,
- argLen: 3,
- asm: x86.AMOVUPS,
- reg: regInfo{
- inputs: []inputInfo{
- {1, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
- {0, 65791}, // AX CX DX BX SP BP SI DI SB
- },
- },
- },
- {
name: "MOVBloadidx1",
auxType: auxSymOff,
argLen: 3,
@@ -3418,23 +3389,12 @@ var opcodeTable = [...]opInfo{
reg: regInfo{
inputs: []inputInfo{
{0, 128}, // DI
- {1, 256}, // X0
+ {1, 1}, // AX
},
clobbers: 131200, // DI FLAGS
},
},
{
- name: "MOVOconst",
- auxType: auxInt128,
- argLen: 0,
- rematerializeable: true,
- reg: regInfo{
- outputs: []outputInfo{
- {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
- },
- },
- },
- {
name: "REPSTOSL",
argLen: 4,
reg: regInfo{
@@ -3502,7 +3462,7 @@ var opcodeTable = [...]opInfo{
{0, 128}, // DI
{1, 64}, // SI
},
- clobbers: 131520, // SI DI X0 FLAGS
+ clobbers: 131266, // CX SI DI FLAGS
},
},
{
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 03c38827cc..09798eb1bd 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -254,39 +254,38 @@ func isSamePtr(p1, p2 *Value) bool {
return false
}
-// DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
-// See runtime/mkduff.go.
-const (
- dzBlocks = 16 // number of MOV/ADD blocks
- dzBlockLen = 4 // number of clears per block
- dzBlockSize = 19 // size of instructions in a single block
- dzMovSize = 4 // size of single MOV instruction w/ offset
- dzAddSize = 4 // size of single ADD instruction
- dzClearStep = 16 // number of bytes cleared by each MOV instruction
-
- dzTailLen = 4 // number of final STOSQ instructions
- dzTailSize = 2 // size of single STOSQ instruction
-
- dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
- dzSize = dzBlocks * dzBlockSize
-)
-
-func duffStart(size int64) int64 {
- x, _ := duff(size)
+func duffStartAMD64(size int64) int64 {
+ x, _ := duffAMD64(size)
return x
}
-func duffAdj(size int64) int64 {
- _, x := duff(size)
+func duffAdjAMD64(size int64) int64 {
+ _, x := duffAMD64(size)
return x
}
// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
// required to use the duffzero mechanism for a block of the given size.
-func duff(size int64) (int64, int64) {
+func duffAMD64(size int64) (int64, int64) {
+ // DUFFZERO consists of repeated blocks of 4 MOVUPSs + ADD,
+ // See runtime/mkduff.go.
+ const (
+ dzBlocks = 16 // number of MOV/ADD blocks
+ dzBlockLen = 4 // number of clears per block
+ dzBlockSize = 19 // size of instructions in a single block
+ dzMovSize = 4 // size of single MOV instruction w/ offset
+ dzAddSize = 4 // size of single ADD instruction
+ dzClearStep = 16 // number of bytes cleared by each MOV instruction
+
+ dzTailLen = 4 // number of final STOSQ instructions
+ dzTailSize = 2 // size of single STOSQ instruction
+
+ dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
+ dzSize = dzBlocks * dzBlockSize
+ )
+
if size < 32 || size > 1024 || size%dzClearStep != 0 {
panic("bad duffzero size")
}
- // TODO: arch-dependent
steps := size / dzClearStep
blocks := steps / dzBlockLen
steps %= dzBlockLen
diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go
index f3f021493d..5d571c588f 100644
--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
@@ -240,18 +240,24 @@ func rewriteValue386(v *Value, config *Config) bool {
return rewriteValue386_OpLsh16x16(v, config)
case OpLsh16x32:
return rewriteValue386_OpLsh16x32(v, config)
+ case OpLsh16x64:
+ return rewriteValue386_OpLsh16x64(v, config)
case OpLsh16x8:
return rewriteValue386_OpLsh16x8(v, config)
case OpLsh32x16:
return rewriteValue386_OpLsh32x16(v, config)
case OpLsh32x32:
return rewriteValue386_OpLsh32x32(v, config)
+ case OpLsh32x64:
+ return rewriteValue386_OpLsh32x64(v, config)
case OpLsh32x8:
return rewriteValue386_OpLsh32x8(v, config)
case OpLsh8x16:
return rewriteValue386_OpLsh8x16(v, config)
case OpLsh8x32:
return rewriteValue386_OpLsh8x32(v, config)
+ case OpLsh8x64:
+ return rewriteValue386_OpLsh8x64(v, config)
case OpLsh8x8:
return rewriteValue386_OpLsh8x8(v, config)
case Op386MOVBLSX:
@@ -290,10 +296,6 @@ func rewriteValue386(v *Value, config *Config) bool {
return rewriteValue386_Op386MOVLstoreidx1(v, config)
case Op386MOVLstoreidx4:
return rewriteValue386_Op386MOVLstoreidx4(v, config)
- case Op386MOVOload:
- return rewriteValue386_Op386MOVOload(v, config)
- case Op386MOVOstore:
- return rewriteValue386_Op386MOVOstore(v, config)
case Op386MOVSDload:
return rewriteValue386_Op386MOVSDload(v, config)
case Op386MOVSDloadidx1:
@@ -428,36 +430,48 @@ func rewriteValue386(v *Value, config *Config) bool {
return rewriteValue386_OpRsh16Ux16(v, config)
case OpRsh16Ux32:
return rewriteValue386_OpRsh16Ux32(v, config)
+ case OpRsh16Ux64:
+ return rewriteValue386_OpRsh16Ux64(v, config)
case OpRsh16Ux8:
return rewriteValue386_OpRsh16Ux8(v, config)
case OpRsh16x16:
return rewriteValue386_OpRsh16x16(v, config)
case OpRsh16x32:
return rewriteValue386_OpRsh16x32(v, config)
+ case OpRsh16x64:
+ return rewriteValue386_OpRsh16x64(v, config)
case OpRsh16x8:
return rewriteValue386_OpRsh16x8(v, config)
case OpRsh32Ux16:
return rewriteValue386_OpRsh32Ux16(v, config)
case OpRsh32Ux32:
return rewriteValue386_OpRsh32Ux32(v, config)
+ case OpRsh32Ux64:
+ return rewriteValue386_OpRsh32Ux64(v, config)
case OpRsh32Ux8:
return rewriteValue386_OpRsh32Ux8(v, config)
case OpRsh32x16:
return rewriteValue386_OpRsh32x16(v, config)
case OpRsh32x32:
return rewriteValue386_OpRsh32x32(v, config)
+ case OpRsh32x64:
+ return rewriteValue386_OpRsh32x64(v, config)
case OpRsh32x8:
return rewriteValue386_OpRsh32x8(v, config)
case OpRsh8Ux16:
return rewriteValue386_OpRsh8Ux16(v, config)
case OpRsh8Ux32:
return rewriteValue386_OpRsh8Ux32(v, config)
+ case OpRsh8Ux64:
+ return rewriteValue386_OpRsh8Ux64(v, config)
case OpRsh8Ux8:
return rewriteValue386_OpRsh8Ux8(v, config)
case OpRsh8x16:
return rewriteValue386_OpRsh8x16(v, config)
case OpRsh8x32:
return rewriteValue386_OpRsh8x32(v, config)
+ case OpRsh8x64:
+ return rewriteValue386_OpRsh8x64(v, config)
case OpRsh8x8:
return rewriteValue386_OpRsh8x8(v, config)
case Op386SARB:
@@ -516,6 +530,8 @@ func rewriteValue386(v *Value, config *Config) bool {
return rewriteValue386_OpSignExt8to16(v, config)
case OpSignExt8to32:
return rewriteValue386_OpSignExt8to32(v, config)
+ case OpSignmask:
+ return rewriteValue386_OpSignmask(v, config)
case OpSqrt:
return rewriteValue386_OpSqrt(v, config)
case OpStaticCall:
@@ -562,6 +578,8 @@ func rewriteValue386(v *Value, config *Config) bool {
return rewriteValue386_OpZeroExt8to16(v, config)
case OpZeroExt8to32:
return rewriteValue386_OpZeroExt8to32(v, config)
+ case OpZeromask:
+ return rewriteValue386_OpZeromask(v, config)
}
return false
}
@@ -4062,6 +4080,45 @@ func rewriteValue386_OpLsh16x32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpLsh16x64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Lsh16x64 x (Const64 [c]))
+ // cond: uint64(c) < 16
+ // result: (SHLLconst x [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) < 16) {
+ break
+ }
+ v.reset(Op386SHLLconst)
+ v.AddArg(x)
+ v.AuxInt = c
+ return true
+ }
+ // match: (Lsh16x64 _ (Const64 [c]))
+ // cond: uint64(c) >= 16
+ // result: (Const16 [0])
+ for {
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) >= 16) {
+ break
+ }
+ v.reset(OpConst16)
+ v.AuxInt = 0
+ return true
+ }
+ return false
+}
func rewriteValue386_OpLsh16x8(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -4134,6 +4191,45 @@ func rewriteValue386_OpLsh32x32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpLsh32x64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Lsh32x64 x (Const64 [c]))
+ // cond: uint64(c) < 32
+ // result: (SHLLconst x [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) < 32) {
+ break
+ }
+ v.reset(Op386SHLLconst)
+ v.AddArg(x)
+ v.AuxInt = c
+ return true
+ }
+ // match: (Lsh32x64 _ (Const64 [c]))
+ // cond: uint64(c) >= 32
+ // result: (Const32 [0])
+ for {
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) >= 32) {
+ break
+ }
+ v.reset(OpConst32)
+ v.AuxInt = 0
+ return true
+ }
+ return false
+}
func rewriteValue386_OpLsh32x8(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -4206,6 +4302,45 @@ func rewriteValue386_OpLsh8x32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpLsh8x64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Lsh8x64 x (Const64 [c]))
+ // cond: uint64(c) < 8
+ // result: (SHLLconst x [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) < 8) {
+ break
+ }
+ v.reset(Op386SHLLconst)
+ v.AddArg(x)
+ v.AuxInt = c
+ return true
+ }
+ // match: (Lsh8x64 _ (Const64 [c]))
+ // cond: uint64(c) >= 8
+ // result: (Const8 [0])
+ for {
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) >= 8) {
+ break
+ }
+ v.reset(OpConst8)
+ v.AuxInt = 0
+ return true
+ }
+ return false
+}
func rewriteValue386_OpLsh8x8(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -5997,114 +6132,6 @@ func rewriteValue386_Op386MOVLstoreidx4(v *Value, config *Config) bool {
}
return false
}
-func rewriteValue386_Op386MOVOload(v *Value, config *Config) bool {
- b := v.Block
- _ = b
- // match: (MOVOload [off1] {sym} (ADDLconst [off2] ptr) mem)
- // cond: is32Bit(off1+off2)
- // result: (MOVOload [off1+off2] {sym} ptr mem)
- for {
- off1 := v.AuxInt
- sym := v.Aux
- v_0 := v.Args[0]
- if v_0.Op != Op386ADDLconst {
- break
- }
- off2 := v_0.AuxInt
- ptr := v_0.Args[0]
- mem := v.Args[1]
- if !(is32Bit(off1 + off2)) {
- break
- }
- v.reset(Op386MOVOload)
- v.AuxInt = off1 + off2
- v.Aux = sym
- v.AddArg(ptr)
- v.AddArg(mem)
- return true
- }
- // match: (MOVOload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
- // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
- // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
- for {
- off1 := v.AuxInt
- sym1 := v.Aux
- v_0 := v.Args[0]
- if v_0.Op != Op386LEAL {
- break
- }
- off2 := v_0.AuxInt
- sym2 := v_0.Aux
- base := v_0.Args[0]
- mem := v.Args[1]
- if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
- break
- }
- v.reset(Op386MOVOload)
- v.AuxInt = off1 + off2
- v.Aux = mergeSym(sym1, sym2)
- v.AddArg(base)
- v.AddArg(mem)
- return true
- }
- return false
-}
-func rewriteValue386_Op386MOVOstore(v *Value, config *Config) bool {
- b := v.Block
- _ = b
- // match: (MOVOstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
- // cond: is32Bit(off1+off2)
- // result: (MOVOstore [off1+off2] {sym} ptr val mem)
- for {
- off1 := v.AuxInt
- sym := v.Aux
- v_0 := v.Args[0]
- if v_0.Op != Op386ADDLconst {
- break
- }
- off2 := v_0.AuxInt
- ptr := v_0.Args[0]
- val := v.Args[1]
- mem := v.Args[2]
- if !(is32Bit(off1 + off2)) {
- break
- }
- v.reset(Op386MOVOstore)
- v.AuxInt = off1 + off2
- v.Aux = sym
- v.AddArg(ptr)
- v.AddArg(val)
- v.AddArg(mem)
- return true
- }
- // match: (MOVOstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
- // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
- // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
- for {
- off1 := v.AuxInt
- sym1 := v.Aux
- v_0 := v.Args[0]
- if v_0.Op != Op386LEAL {
- break
- }
- off2 := v_0.AuxInt
- sym2 := v_0.Aux
- base := v_0.Args[0]
- val := v.Args[1]
- mem := v.Args[2]
- if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
- break
- }
- v.reset(Op386MOVOstore)
- v.AuxInt = off1 + off2
- v.Aux = mergeSym(sym1, sym2)
- v.AddArg(base)
- v.AddArg(val)
- v.AddArg(mem)
- return true
- }
- return false
-}
func rewriteValue386_Op386MOVSDload(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -9073,26 +9100,6 @@ func rewriteValue386_OpMove(v *Value, config *Config) bool {
return true
}
// match: (Move [s] dst src mem)
- // cond: SizeAndAlign(s).Size() == 16
- // result: (MOVOstore dst (MOVOload src mem) mem)
- for {
- s := v.AuxInt
- dst := v.Args[0]
- src := v.Args[1]
- mem := v.Args[2]
- if !(SizeAndAlign(s).Size() == 16) {
- break
- }
- v.reset(Op386MOVOstore)
- v.AddArg(dst)
- v0 := b.NewValue0(v.Line, Op386MOVOload, TypeInt128)
- v0.AddArg(src)
- v0.AddArg(mem)
- v.AddArg(v0)
- v.AddArg(mem)
- return true
- }
- // match: (Move [s] dst src mem)
// cond: SizeAndAlign(s).Size() == 3
// result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem))
for {
@@ -9209,32 +9216,92 @@ func rewriteValue386_OpMove(v *Value, config *Config) bool {
return true
}
// match: (Move [s] dst src mem)
- // cond: SizeAndAlign(s).Size() >= 32 && SizeAndAlign(s).Size() <= 16*64 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice
- // result: (DUFFCOPY [14*(64-SizeAndAlign(s).Size()/16)] dst src mem)
+ // cond: SizeAndAlign(s).Size() == 8
+ // result: (MOVLstore [4] dst (MOVLload [4] src mem) (MOVLstore dst (MOVLload src mem) mem))
+ for {
+ s := v.AuxInt
+ dst := v.Args[0]
+ src := v.Args[1]
+ mem := v.Args[2]
+ if !(SizeAndAlign(s).Size() == 8) {
+ break
+ }
+ v.reset(Op386MOVLstore)
+ v.AuxInt = 4
+ v.AddArg(dst)
+ v0 := b.NewValue0(v.Line, Op386MOVLload, config.fe.TypeUInt32())
+ v0.AuxInt = 4
+ v0.AddArg(src)
+ v0.AddArg(mem)
+ v.AddArg(v0)
+ v1 := b.NewValue0(v.Line, Op386MOVLstore, TypeMem)
+ v1.AddArg(dst)
+ v2 := b.NewValue0(v.Line, Op386MOVLload, config.fe.TypeUInt32())
+ v2.AddArg(src)
+ v2.AddArg(mem)
+ v1.AddArg(v2)
+ v1.AddArg(mem)
+ v.AddArg(v1)
+ return true
+ }
+ // match: (Move [s] dst src mem)
+ // cond: SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size()%4 != 0
+ // result: (Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%4] (ADDLconst <dst.Type> dst [SizeAndAlign(s).Size()%4]) (ADDLconst <src.Type> src [SizeAndAlign(s).Size()%4]) (MOVLstore dst (MOVLload src mem) mem))
for {
s := v.AuxInt
dst := v.Args[0]
src := v.Args[1]
mem := v.Args[2]
- if !(SizeAndAlign(s).Size() >= 32 && SizeAndAlign(s).Size() <= 16*64 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice) {
+ if !(SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size()%4 != 0) {
+ break
+ }
+ v.reset(OpMove)
+ v.AuxInt = SizeAndAlign(s).Size() - SizeAndAlign(s).Size()%4
+ v0 := b.NewValue0(v.Line, Op386ADDLconst, dst.Type)
+ v0.AddArg(dst)
+ v0.AuxInt = SizeAndAlign(s).Size() % 4
+ v.AddArg(v0)
+ v1 := b.NewValue0(v.Line, Op386ADDLconst, src.Type)
+ v1.AddArg(src)
+ v1.AuxInt = SizeAndAlign(s).Size() % 4
+ v.AddArg(v1)
+ v2 := b.NewValue0(v.Line, Op386MOVLstore, TypeMem)
+ v2.AddArg(dst)
+ v3 := b.NewValue0(v.Line, Op386MOVLload, config.fe.TypeUInt32())
+ v3.AddArg(src)
+ v3.AddArg(mem)
+ v2.AddArg(v3)
+ v2.AddArg(mem)
+ v.AddArg(v2)
+ return true
+ }
+ // match: (Move [s] dst src mem)
+ // cond: SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size() <= 4*128 && SizeAndAlign(s).Size()%4 == 0 && !config.noDuffDevice
+ // result: (DUFFCOPY [10*(128-SizeAndAlign(s).Size()/4)] dst src mem)
+ for {
+ s := v.AuxInt
+ dst := v.Args[0]
+ src := v.Args[1]
+ mem := v.Args[2]
+ if !(SizeAndAlign(s).Size() > 8 && SizeAndAlign(s).Size() <= 4*128 && SizeAndAlign(s).Size()%4 == 0 && !config.noDuffDevice) {
break
}
v.reset(Op386DUFFCOPY)
- v.AuxInt = 14 * (64 - SizeAndAlign(s).Size()/16)
+ v.AuxInt = 10 * (128 - SizeAndAlign(s).Size()/4)
v.AddArg(dst)
v.AddArg(src)
v.AddArg(mem)
return true
}
// match: (Move [s] dst src mem)
- // cond: (SizeAndAlign(s).Size() > 16*64 || config.noDuffDevice) && SizeAndAlign(s).Size()%8 == 0
+ // cond: (SizeAndAlign(s).Size() > 4*128 || config.noDuffDevice) && SizeAndAlign(s).Size()%4 == 0
// result: (REPMOVSL dst src (MOVLconst [SizeAndAlign(s).Size()/4]) mem)
for {
s := v.AuxInt
dst := v.Args[0]
src := v.Args[1]
mem := v.Args[2]
- if !((SizeAndAlign(s).Size() > 16*64 || config.noDuffDevice) && SizeAndAlign(s).Size()%8 == 0) {
+ if !((SizeAndAlign(s).Size() > 4*128 || config.noDuffDevice) && SizeAndAlign(s).Size()%4 == 0) {
break
}
v.reset(Op386REPMOVSL)
@@ -10006,32 +10073,16 @@ func rewriteValue386_OpOffPtr(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (OffPtr [off] ptr)
- // cond: is32Bit(off)
+ // cond:
// result: (ADDLconst [off] ptr)
for {
off := v.AuxInt
ptr := v.Args[0]
- if !(is32Bit(off)) {
- break
- }
v.reset(Op386ADDLconst)
v.AuxInt = off
v.AddArg(ptr)
return true
}
- // match: (OffPtr [off] ptr)
- // cond:
- // result: (ADDL (MOVLconst [off]) ptr)
- for {
- off := v.AuxInt
- ptr := v.Args[0]
- v.reset(Op386ADDL)
- v0 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
- v0.AuxInt = off
- v.AddArg(v0)
- v.AddArg(ptr)
- return true
- }
}
func rewriteValue386_OpOr16(v *Value, config *Config) bool {
b := v.Block
@@ -10243,6 +10294,45 @@ func rewriteValue386_OpRsh16Ux32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpRsh16Ux64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Rsh16Ux64 x (Const64 [c]))
+ // cond: uint64(c) < 16
+ // result: (SHRWconst x [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) < 16) {
+ break
+ }
+ v.reset(Op386SHRWconst)
+ v.AddArg(x)
+ v.AuxInt = c
+ return true
+ }
+ // match: (Rsh16Ux64 _ (Const64 [c]))
+ // cond: uint64(c) >= 16
+ // result: (Const16 [0])
+ for {
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) >= 16) {
+ break
+ }
+ v.reset(OpConst16)
+ v.AuxInt = 0
+ return true
+ }
+ return false
+}
func rewriteValue386_OpRsh16Ux8(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -10321,6 +10411,29 @@ func rewriteValue386_OpRsh16x32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpRsh16x64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Rsh16x64 x (Const64 [c]))
+ // cond: uint64(c) < 16
+ // result: (SARWconst x [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) < 16) {
+ break
+ }
+ v.reset(Op386SARWconst)
+ v.AddArg(x)
+ v.AuxInt = c
+ return true
+ }
+ return false
+}
func rewriteValue386_OpRsh16x8(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -10396,6 +10509,45 @@ func rewriteValue386_OpRsh32Ux32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpRsh32Ux64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Rsh32Ux64 x (Const64 [c]))
+ // cond: uint64(c) < 32
+ // result: (SHRLconst x [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) < 32) {
+ break
+ }
+ v.reset(Op386SHRLconst)
+ v.AddArg(x)
+ v.AuxInt = c
+ return true
+ }
+ // match: (Rsh32Ux64 _ (Const64 [c]))
+ // cond: uint64(c) >= 32
+ // result: (Const32 [0])
+ for {
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) >= 32) {
+ break
+ }
+ v.reset(OpConst32)
+ v.AuxInt = 0
+ return true
+ }
+ return false
+}
func rewriteValue386_OpRsh32Ux8(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -10474,6 +10626,29 @@ func rewriteValue386_OpRsh32x32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpRsh32x64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Rsh32x64 x (Const64 [c]))
+ // cond: uint64(c) < 32
+ // result: (SARLconst x [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) < 32) {
+ break
+ }
+ v.reset(Op386SARLconst)
+ v.AddArg(x)
+ v.AuxInt = c
+ return true
+ }
+ return false
+}
func rewriteValue386_OpRsh32x8(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -10549,6 +10724,45 @@ func rewriteValue386_OpRsh8Ux32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpRsh8Ux64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Rsh8Ux64 x (Const64 [c]))
+ // cond: uint64(c) < 8
+ // result: (SHRBconst x [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) < 8) {
+ break
+ }
+ v.reset(Op386SHRBconst)
+ v.AddArg(x)
+ v.AuxInt = c
+ return true
+ }
+ // match: (Rsh8Ux64 _ (Const64 [c]))
+ // cond: uint64(c) >= 8
+ // result: (Const8 [0])
+ for {
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) >= 8) {
+ break
+ }
+ v.reset(OpConst8)
+ v.AuxInt = 0
+ return true
+ }
+ return false
+}
func rewriteValue386_OpRsh8Ux8(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -10627,6 +10841,29 @@ func rewriteValue386_OpRsh8x32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpRsh8x64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Rsh8x64 x (Const64 [c]))
+ // cond: uint64(c) < 8
+ // result: (SARBconst x [c])
+ for {
+ x := v.Args[0]
+ v_1 := v.Args[1]
+ if v_1.Op != OpConst64 {
+ break
+ }
+ c := v_1.AuxInt
+ if !(uint64(c) < 8) {
+ break
+ }
+ v.reset(Op386SARBconst)
+ v.AddArg(x)
+ v.AuxInt = c
+ return true
+ }
+ return false
+}
func rewriteValue386_OpRsh8x8(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -12014,6 +12251,20 @@ func rewriteValue386_OpSignExt8to32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpSignmask(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Signmask x)
+ // cond:
+ // result: (SARLconst x [31])
+ for {
+ x := v.Args[0]
+ v.reset(Op386SARLconst)
+ v.AddArg(x)
+ v.AuxInt = 31
+ return true
+ }
+}
func rewriteValue386_OpSqrt(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -12681,19 +12932,38 @@ func rewriteValue386_OpZero(v *Value, config *Config) bool {
return true
}
// match: (Zero [s] destptr mem)
- // cond: (SizeAndAlign(s).Size() > 1024 || (config.noDuffDevice && SizeAndAlign(s).Size() > 32)) && SizeAndAlign(s).Size()%8 == 0
- // result: (REPSTOSL destptr (MOVLconst [SizeAndAlign(s).Size()/8]) (MOVLconst [0]) mem)
+ // cond: SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size() <= 4*128 && SizeAndAlign(s).Size()%4 == 0 && !config.noDuffDevice
+ // result: (DUFFZERO [1*(128-SizeAndAlign(s).Size()/4)] destptr (MOVLconst [0]) mem)
+ for {
+ s := v.AuxInt
+ destptr := v.Args[0]
+ mem := v.Args[1]
+ if !(SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size() <= 4*128 && SizeAndAlign(s).Size()%4 == 0 && !config.noDuffDevice) {
+ break
+ }
+ v.reset(Op386DUFFZERO)
+ v.AuxInt = 1 * (128 - SizeAndAlign(s).Size()/4)
+ v.AddArg(destptr)
+ v0 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
+ v0.AuxInt = 0
+ v.AddArg(v0)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (Zero [s] destptr mem)
+ // cond: (SizeAndAlign(s).Size() > 4*128 || (config.noDuffDevice && SizeAndAlign(s).Size() > 16)) && SizeAndAlign(s).Size()%4 == 0
+ // result: (REPSTOSL destptr (MOVLconst [SizeAndAlign(s).Size()/4]) (MOVLconst [0]) mem)
for {
s := v.AuxInt
destptr := v.Args[0]
mem := v.Args[1]
- if !((SizeAndAlign(s).Size() > 1024 || (config.noDuffDevice && SizeAndAlign(s).Size() > 32)) && SizeAndAlign(s).Size()%8 == 0) {
+ if !((SizeAndAlign(s).Size() > 4*128 || (config.noDuffDevice && SizeAndAlign(s).Size() > 16)) && SizeAndAlign(s).Size()%4 == 0) {
break
}
v.reset(Op386REPSTOSL)
v.AddArg(destptr)
v0 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
- v0.AuxInt = SizeAndAlign(s).Size() / 8
+ v0.AuxInt = SizeAndAlign(s).Size() / 4
v.AddArg(v0)
v1 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
v1.AuxInt = 0
@@ -12742,6 +13012,24 @@ func rewriteValue386_OpZeroExt8to32(v *Value, config *Config) bool {
return true
}
}
+func rewriteValue386_OpZeromask(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Zeromask x)
+ // cond:
+ // result: (SBBLcarrymask (CMPL (MOVLconst [0]) x))
+ for {
+ x := v.Args[0]
+ v.reset(Op386SBBLcarrymask)
+ v0 := b.NewValue0(v.Line, Op386CMPL, TypeFlags)
+ v1 := b.NewValue0(v.Line, Op386MOVLconst, config.fe.TypeUInt32())
+ v1.AuxInt = 0
+ v0.AddArg(v1)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteBlock386(b *Block) bool {
switch b.Kind {
case Block386EQ:
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 9888d065cd..01c268f70b 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -17415,7 +17415,7 @@ func rewriteValueAMD64_OpZero(v *Value, config *Config) bool {
}
// match: (Zero [s] destptr mem)
// cond: SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice
- // result: (DUFFZERO [duffStart(SizeAndAlign(s).Size())] (ADDQconst [duffAdj(SizeAndAlign(s).Size())] destptr) (MOVOconst [0]) mem)
+ // result: (DUFFZERO [duffStartAMD64(SizeAndAlign(s).Size())] (ADDQconst [duffAdjAMD64(SizeAndAlign(s).Size())] destptr) (MOVOconst [0]) mem)
for {
s := v.AuxInt
destptr := v.Args[0]
@@ -17424,9 +17424,9 @@ func rewriteValueAMD64_OpZero(v *Value, config *Config) bool {
break
}
v.reset(OpAMD64DUFFZERO)
- v.AuxInt = duffStart(SizeAndAlign(s).Size())
+ v.AuxInt = duffStartAMD64(SizeAndAlign(s).Size())
v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, config.fe.TypeUInt64())
- v0.AuxInt = duffAdj(SizeAndAlign(s).Size())
+ v0.AuxInt = duffAdjAMD64(SizeAndAlign(s).Size())
v0.AddArg(destptr)
v.AddArg(v0)
v1 := b.NewValue0(v.Line, OpAMD64MOVOconst, TypeInt128)
diff --git a/src/cmd/compile/internal/x86/ssa.go b/src/cmd/compile/internal/x86/ssa.go
index ab6410b1c3..f83afa1a58 100644
--- a/src/cmd/compile/internal/x86/ssa.go
+++ b/src/cmd/compile/internal/x86/ssa.go
@@ -101,11 +101,14 @@ func storeByType(t ssa.Type) obj.As {
// moveByType returns the reg->reg move instruction of the given type.
func moveByType(t ssa.Type) obj.As {
if t.IsFloat() {
- // Moving the whole sse2 register is faster
- // than moving just the correct low portion of it.
- // There is no xmm->xmm move with 1 byte opcode,
- // so use movups, which has 2 byte opcode.
- return x86.AMOVUPS
+ switch t.Size() {
+ case 4:
+ return x86.AMOVSS
+ case 8:
+ return x86.AMOVSD
+ default:
+ panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
+ }
} else {
switch t.Size() {
case 1:
@@ -115,8 +118,6 @@ func moveByType(t ssa.Type) obj.As {
return x86.AMOVL
case 4:
return x86.AMOVL
- case 16:
- return x86.AMOVUPS // int128s are in SSE registers
default:
panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
}
@@ -448,7 +449,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
p.To.Type = obj.TYPE_REG
p.To.Reg = x
- case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload, ssa.Op386MOVOload:
+ case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = gc.SSARegNum(v.Args[0])
@@ -496,7 +497,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum(v)
- case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore, ssa.Op386MOVOstore:
+ case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = gc.SSARegNum(v.Args[1])
@@ -584,12 +585,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_ADDR
p.To.Sym = gc.Linksym(gc.Pkglookup("duffzero", gc.Runtimepkg))
p.To.Offset = v.AuxInt
- case ssa.Op386MOVOconst:
- if v.AuxInt != 0 {
- v.Unimplementedf("MOVOconst can only do constant=0")
- }
- r := gc.SSARegNum(v)
- opregreg(x86.AXORPS, r, r)
case ssa.Op386DUFFCOPY:
p := gc.Prog(obj.ADUFFCOPY)
p.To.Type = obj.TYPE_ADDR
@@ -828,8 +823,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload,
ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore,
ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload,
- ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVOload,
- ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVOstore:
+ ssa.Op386MOVSSload, ssa.Op386MOVSDload,
+ ssa.Op386MOVSSstore, ssa.Op386MOVSDstore:
if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
if gc.Debug_checknil != 0 && int(v.Line) > 1 {
gc.Warnl(v.Line, "removed nil check")