[release-branch.go1.16] cmd/compile: fix long RMW bit operations on AMD64

Under certain circumstances, the existing rules for bit operations can produce code that writes beyond its intended bounds. For example, consider the following code: func repro(b []byte, addr, bit int32) { _ = b[3] v := uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | 1<<(bit&31) b[0] = byte(v) b[1] = byte(v >> 8) b[2] = byte(v >> 16) b[3] = byte(v >> 24) } Roughly speaking: 1. The expression `1 << (bit & 31)` is rewritten into `(SHLL 1 bit)` 2. The expression `uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24` is rewritten into `(MOVLload &b[0])` 3. The statements `b[0] = byte(v) ... b[3] = byte(v >> 24)` are rewritten into `(MOVLstore &b[0], v)` 4. `(ORL (SHLL 1, bit) (MOVLload &b[0]))` is rewritten into `(BTSL (MOVLload &b[0]) bit)`. This is a valid transformation because the destination is a register: in this case, the bit offset is masked by the number of bits in the destination register. This is identical to the masking performed by `SHL`. 5. `(MOVLstore &b[0] (BTSL (MOVLload &b[0]) bit))` is rewritten into `(BTSLmodify &b[0] bit)`. This is an invalid transformation because the destination is memory: in this case, the bit offset is not masked, and the chosen instruction may write outside its intended 32-bit location. These changes fix the invalid rewrite performed in step (5) by explicitly maksing the bit offset operand to `BT(S|R|C)(L|Q)modify`. In the example above, the adjusted rules produce `(BTSLmodify &b[0] (ANDLconst [31] bit))` in step (5). These changes also add several new rules to rewrite bit sets, toggles, and clears that are rooted at `(OR|XOR|AND)(L|Q)modify` operators into appropriate `BT(S|R|C)(L|Q)modify` operators. These rules catch cases where `MOV(L|Q)store ((OR|XOR|AND)(L|Q) ...)` is rewritten to `(OR|XOR|AND)(L|Q)modify` before the `(OR|XOR|AND)(L|Q) ...` can be rewritten to `BT(S|R|C)(L|Q) ...`. Overall, compilecmp reports small improvements in code size on darwin/amd64 when the changes to the compiler itself are exlcuded: file before after Δ % runtime.s 536464 536412 -52 -0.010% bytes.s 32629 32593 -36 -0.110% strings.s 44565 44529 -36 -0.081% os/signal.s 7967 7959 -8 -0.100% cmd/vendor/golang.org/x/sys/unix.s 81686 81678 -8 -0.010% math/big.s 188235 188253 +18 +0.010% cmd/link/internal/loader.s 89295 89056 -239 -0.268% cmd/link/internal/ld.s 633551 633232 -319 -0.050% cmd/link/internal/arm.s 18934 18928 -6 -0.032% cmd/link/internal/arm64.s 31814 31801 -13 -0.041% cmd/link/internal/riscv64.s 7347 7345 -2 -0.027% cmd/compile/internal/ssa.s 4029173 4033066 +3893 +0.097% total 21298280 21301472 +3192 +0.015% Fixes #45253 Change-Id: I2e560548b515865129e1724e150e30540e9d29ce GitHub-Last-Rev: ab94ede1d097f920a9d1d3da403c8e4a3d8f6d44 GitHub-Pull-Request: golang/go#45242 Reviewed-on: https://go-review.googlesource.com/c/go/+/305069 Trust: Emmanuel Odeke <emmanuel@orijtech.com> Run-TryBot: Emmanuel Odeke <emmanuel@orijtech.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
author: Pat Gavlin <pgavlin@gmail.com> 2021-03-26 14:46:37 +0000
committer: David Chase <drchase@google.com> 2021-03-31 17:15:59 +0000
commit: 96139f25993f3d8122e27a6fec877a4d4f69f83b (patch)
tree: 7017ee1f9b9eae9cb8af38a2e6cea121bfb4a8b1
parent: 887c0d890fd33a72cc1de6316252ba37649145cb (diff)
download: go-96139f25993f3d8122e27a6fec877a4d4f69f83b.tar.gz
go-96139f25993f3d8122e27a6fec877a4d4f69f83b.zip
4 files changed, 244 insertions, 24 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index a866a967b9..5de1e1ec31 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -623,6 +623,14 @@
 // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
 (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
 (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
+(ORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) <t> x) mem) =>
+	(BTSLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
+(ORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) <t> x) mem) =>
+	(BTSQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
+(XORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) <t> x) mem) =>
+	(BTCLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
+(XORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) <t> x) mem) =>
+	(BTCQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
 
 // Convert ORconst into BTS, if the code gets smaller, with boundary being
 // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
@@ -645,6 +653,10 @@
     => (BTRQconst [int8(log64(^c))] x)
 (ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
     => (BTRLconst [int8(log32(^c))] x)
+(ANDLmodify [off] {sym} ptr (NOTL s:(SHLL (MOVLconst [1]) <t> x)) mem) =>
+	(BTRLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
+(ANDQmodify [off] {sym} ptr (NOTQ s:(SHLQ (MOVQconst [1]) <t> x)) mem) =>
+	(BTRQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
 
 // Special-case bit patterns on first/last bit.
 // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
@@ -2050,11 +2062,15 @@
 ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
 ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
-(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
-	((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem)
+(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
+	((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
+(MOVLstore {sym} [off] ptr y:((BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) <t> x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
+	((BTC|BTR|BTS)Lmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
 (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
-(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
-	((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem)
+(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
+	((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
+(MOVQstore {sym} [off] ptr y:((BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) <t> x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
+	((BTC|BTR|BTS)Qmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
 
 // Merge ADDQconst and LEAQ into atomic loads.
 (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index de5372670b..a87581b68f 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -358,6 +358,11 @@ func init() {
 		{name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64
 
 		// direct bit operation on memory operand
+		//
+		// Note that these operations do not mask the bit offset (arg1), and will write beyond their expected
+		// bounds if that argument is larger than 64/32 (for BT*Q and BT*L, respectively). If the compiler
+		// cannot prove that arg1 is in range, it must be explicitly masked (see e.g. the patterns that produce
+		// BT*modify from (MOVstore (BT* (MOVLload ptr mem) x) mem)).
 		{name: "BTCQmodify", argLength: 3, reg: gpstore, asm: "BTCQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},     // complement bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
 		{name: "BTCLmodify", argLength: 3, reg: gpstore, asm: "BTCL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},     // complement bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
 		{name: "BTSQmodify", argLength: 3, reg: gpstore, asm: "BTSQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},     // set bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 75d4ff7357..8272a406d9 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -2998,6 +2998,36 @@ func rewriteValueAMD64_OpAMD64ANDLmodify(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ANDLmodify [off] {sym} ptr (NOTL s:(SHLL (MOVLconst [1]) <t> x)) mem)
+	// result: (BTRLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr := v_0
+		if v_1.Op != OpAMD64NOTL {
+			break
+		}
+		s := v_1.Args[0]
+		if s.Op != OpAMD64SHLL {
+			break
+		}
+		t := s.Type
+		x := s.Args[1]
+		s_0 := s.Args[0]
+		if s_0.Op != OpAMD64MOVLconst || auxIntToInt32(s_0.AuxInt) != 1 {
+			break
+		}
+		mem := v_2
+		v.reset(OpAMD64BTRLmodify)
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, t)
+		v0.AuxInt = int32ToAuxInt(31)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
+		return true
+	}
 	// match: (ANDLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
 	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ANDLmodify [off1+off2] {sym} base val mem)
@@ -3377,6 +3407,36 @@ func rewriteValueAMD64_OpAMD64ANDQmodify(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ANDQmodify [off] {sym} ptr (NOTQ s:(SHLQ (MOVQconst [1]) <t> x)) mem)
+	// result: (BTRQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr := v_0
+		if v_1.Op != OpAMD64NOTQ {
+			break
+		}
+		s := v_1.Args[0]
+		if s.Op != OpAMD64SHLQ {
+			break
+		}
+		t := s.Type
+		x := s.Args[1]
+		s_0 := s.Args[0]
+		if s_0.Op != OpAMD64MOVQconst || auxIntToInt64(s_0.AuxInt) != 1 {
+			break
+		}
+		mem := v_2
+		v.reset(OpAMD64BTRQmodify)
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpAMD64ANDQconst, t)
+		v0.AuxInt = int32ToAuxInt(63)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
+		return true
+	}
 	// match: (ANDQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
 	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ANDQmodify [off1+off2] {sym} base val mem)
@@ -12709,9 +12769,9 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
 		}
 		break
 	}
-	// match: (MOVLstore {sym} [off] ptr y:(BTCL l:(MOVLload [off] {sym} ptr mem) x) mem)
+	// match: (MOVLstore {sym} [off] ptr y:(BTCL l:(MOVLload [off] {sym} ptr mem) <t> x) mem)
 	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (BTCLmodify [off] {sym} ptr x mem)
+	// result: (BTCLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@@ -12720,6 +12780,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
 		if y.Op != OpAMD64BTCL {
 			break
 		}
+		t := y.Type
 		x := y.Args[1]
 		l := y.Args[0]
 		if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
@@ -12732,12 +12793,15 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
 		v.reset(OpAMD64BTCLmodify)
 		v.AuxInt = int32ToAuxInt(off)
 		v.Aux = symToAux(sym)
-		v.AddArg3(ptr, x, mem)
+		v0 := b.NewValue0(l.Pos, OpAMD64ANDLconst, t)
+		v0.AuxInt = int32ToAuxInt(31)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
 		return true
 	}
-	// match: (MOVLstore {sym} [off] ptr y:(BTRL l:(MOVLload [off] {sym} ptr mem) x) mem)
+	// match: (MOVLstore {sym} [off] ptr y:(BTRL l:(MOVLload [off] {sym} ptr mem) <t> x) mem)
 	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (BTRLmodify [off] {sym} ptr x mem)
+	// result: (BTRLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@@ -12746,6 +12810,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
 		if y.Op != OpAMD64BTRL {
 			break
 		}
+		t := y.Type
 		x := y.Args[1]
 		l := y.Args[0]
 		if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
@@ -12758,12 +12823,15 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
 		v.reset(OpAMD64BTRLmodify)
 		v.AuxInt = int32ToAuxInt(off)
 		v.Aux = symToAux(sym)
-		v.AddArg3(ptr, x, mem)
+		v0 := b.NewValue0(l.Pos, OpAMD64ANDLconst, t)
+		v0.AuxInt = int32ToAuxInt(31)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
 		return true
 	}
-	// match: (MOVLstore {sym} [off] ptr y:(BTSL l:(MOVLload [off] {sym} ptr mem) x) mem)
+	// match: (MOVLstore {sym} [off] ptr y:(BTSL l:(MOVLload [off] {sym} ptr mem) <t> x) mem)
 	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (BTSLmodify [off] {sym} ptr x mem)
+	// result: (BTSLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@@ -12772,6 +12840,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
 		if y.Op != OpAMD64BTSL {
 			break
 		}
+		t := y.Type
 		x := y.Args[1]
 		l := y.Args[0]
 		if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
@@ -12784,7 +12853,10 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
 		v.reset(OpAMD64BTSLmodify)
 		v.AuxInt = int32ToAuxInt(off)
 		v.Aux = symToAux(sym)
-		v.AddArg3(ptr, x, mem)
+		v0 := b.NewValue0(l.Pos, OpAMD64ANDLconst, t)
+		v0.AuxInt = int32ToAuxInt(31)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
 		return true
 	}
 	// match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
@@ -13525,6 +13597,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
+	b := v.Block
 	// match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
 	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (MOVQstore [off1+off2] {sym} ptr val mem)
@@ -13890,9 +13963,9 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
 		}
 		break
 	}
-	// match: (MOVQstore {sym} [off] ptr y:(BTCQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+	// match: (MOVQstore {sym} [off] ptr y:(BTCQ l:(MOVQload [off] {sym} ptr mem) <t> x) mem)
 	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (BTCQmodify [off] {sym} ptr x mem)
+	// result: (BTCQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@@ -13901,6 +13974,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
 		if y.Op != OpAMD64BTCQ {
 			break
 		}
+		t := y.Type
 		x := y.Args[1]
 		l := y.Args[0]
 		if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
@@ -13913,12 +13987,15 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
 		v.reset(OpAMD64BTCQmodify)
 		v.AuxInt = int32ToAuxInt(off)
 		v.Aux = symToAux(sym)
-		v.AddArg3(ptr, x, mem)
+		v0 := b.NewValue0(l.Pos, OpAMD64ANDQconst, t)
+		v0.AuxInt = int32ToAuxInt(63)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
 		return true
 	}
-	// match: (MOVQstore {sym} [off] ptr y:(BTRQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+	// match: (MOVQstore {sym} [off] ptr y:(BTRQ l:(MOVQload [off] {sym} ptr mem) <t> x) mem)
 	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (BTRQmodify [off] {sym} ptr x mem)
+	// result: (BTRQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@@ -13927,6 +14004,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
 		if y.Op != OpAMD64BTRQ {
 			break
 		}
+		t := y.Type
 		x := y.Args[1]
 		l := y.Args[0]
 		if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
@@ -13939,12 +14017,15 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
 		v.reset(OpAMD64BTRQmodify)
 		v.AuxInt = int32ToAuxInt(off)
 		v.Aux = symToAux(sym)
-		v.AddArg3(ptr, x, mem)
+		v0 := b.NewValue0(l.Pos, OpAMD64ANDQconst, t)
+		v0.AuxInt = int32ToAuxInt(63)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
 		return true
 	}
-	// match: (MOVQstore {sym} [off] ptr y:(BTSQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+	// match: (MOVQstore {sym} [off] ptr y:(BTSQ l:(MOVQload [off] {sym} ptr mem) <t> x) mem)
 	// cond: y.Uses==1 && l.Uses==1 && clobber(y, l)
-	// result: (BTSQmodify [off] {sym} ptr x mem)
+	// result: (BTSQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@@ -13953,6 +14034,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
 		if y.Op != OpAMD64BTSQ {
 			break
 		}
+		t := y.Type
 		x := y.Args[1]
 		l := y.Args[0]
 		if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym {
@@ -13965,7 +14047,10 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
 		v.reset(OpAMD64BTSQmodify)
 		v.AuxInt = int32ToAuxInt(off)
 		v.Aux = symToAux(sym)
-		v.AddArg3(ptr, x, mem)
+		v0 := b.NewValue0(l.Pos, OpAMD64ANDQconst, t)
+		v0.AuxInt = int32ToAuxInt(63)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
 		return true
 	}
 	// match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
@@ -18352,6 +18437,33 @@ func rewriteValueAMD64_OpAMD64ORLmodify(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) <t> x) mem)
+	// result: (BTSLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr := v_0
+		s := v_1
+		if s.Op != OpAMD64SHLL {
+			break
+		}
+		t := s.Type
+		x := s.Args[1]
+		s_0 := s.Args[0]
+		if s_0.Op != OpAMD64MOVLconst || auxIntToInt32(s_0.AuxInt) != 1 {
+			break
+		}
+		mem := v_2
+		v.reset(OpAMD64BTSLmodify)
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, t)
+		v0.AuxInt = int32ToAuxInt(31)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
+		return true
+	}
 	// match: (ORLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
 	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ORLmodify [off1+off2] {sym} base val mem)
@@ -19979,6 +20091,33 @@ func rewriteValueAMD64_OpAMD64ORQmodify(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) <t> x) mem)
+	// result: (BTSQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr := v_0
+		s := v_1
+		if s.Op != OpAMD64SHLQ {
+			break
+		}
+		t := s.Type
+		x := s.Args[1]
+		s_0 := s.Args[0]
+		if s_0.Op != OpAMD64MOVQconst || auxIntToInt64(s_0.AuxInt) != 1 {
+			break
+		}
+		mem := v_2
+		v.reset(OpAMD64BTSQmodify)
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpAMD64ANDQconst, t)
+		v0.AuxInt = int32ToAuxInt(63)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
+		return true
+	}
 	// match: (ORQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
 	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (ORQmodify [off1+off2] {sym} base val mem)
@@ -28014,6 +28153,33 @@ func rewriteValueAMD64_OpAMD64XORLmodify(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
+	b := v.Block
+	// match: (XORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) <t> x) mem)
+	// result: (BTCLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr := v_0
+		s := v_1
+		if s.Op != OpAMD64SHLL {
+			break
+		}
+		t := s.Type
+		x := s.Args[1]
+		s_0 := s.Args[0]
+		if s_0.Op != OpAMD64MOVLconst || auxIntToInt32(s_0.AuxInt) != 1 {
+			break
+		}
+		mem := v_2
+		v.reset(OpAMD64BTCLmodify)
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, t)
+		v0.AuxInt = int32ToAuxInt(31)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
+		return true
+	}
 	// match: (XORLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
 	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (XORLmodify [off1+off2] {sym} base val mem)
@@ -28382,6 +28548,33 @@ func rewriteValueAMD64_OpAMD64XORQmodify(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
+	b := v.Block
+	// match: (XORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) <t> x) mem)
+	// result: (BTCQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr := v_0
+		s := v_1
+		if s.Op != OpAMD64SHLQ {
+			break
+		}
+		t := s.Type
+		x := s.Args[1]
+		s_0 := s.Args[0]
+		if s_0.Op != OpAMD64MOVQconst || auxIntToInt64(s_0.AuxInt) != 1 {
+			break
+		}
+		mem := v_2
+		v.reset(OpAMD64BTCQmodify)
+		v.AuxInt = int32ToAuxInt(off)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpAMD64ANDQconst, t)
+		v0.AuxInt = int32ToAuxInt(63)
+		v0.AddArg(x)
+		v.AddArg3(ptr, v0, mem)
+		return true
+	}
 	// match: (XORQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
 	// cond: is32Bit(int64(off1)+int64(off2))
 	// result: (XORQmodify [off1+off2] {sym} base val mem)
diff --git a/test/codegen/bits.go b/test/codegen/bits.go
index 4508eba487..6f7502292a 100644
--- a/test/codegen/bits.go
+++ b/test/codegen/bits.go
@@ -262,8 +262,8 @@ func bitcompl32(a, b uint32) (n uint32) {
 	return n
 }
 
-// check direct operation on memory with constant source
-func bitOpOnMem(a []uint32) {
+// check direct operation on memory with constant and shifted constant sources
+func bitOpOnMem(a []uint32, b, c, d uint32) {
 	// amd64:`ANDL\s[$]200,\s\([A-Z]+\)`
 	a[0] &= 200
 	// amd64:`ORL\s[$]220,\s4\([A-Z]+\)`
@@ -276,6 +276,12 @@ func bitOpOnMem(a []uint32) {
 	a[4] |= 0x4000
 	// amd64:`BTCL\s[$]13,\s20\([A-Z]+\)`,-`XORL`
 	a[5] ^= 0x2000
+	// amd64:`BTRL\s[A-Z]+,\s24\([A-Z]+\)`
+	a[6] &^= 1 << (b & 31)
+	// amd64:`BTSL\s[A-Z]+,\s28\([A-Z]+\)`
+	a[7] |= 1 << (c & 31)
+	// amd64:`BTCL\s[A-Z]+,\s32\([A-Z]+\)`
+	a[8] ^= 1 << (d & 31)
 }
 
 func bitcheckMostNegative(b uint8) bool {
author	Pat Gavlin <pgavlin@gmail.com>	2021-03-26 14:46:37 +0000
committer	David Chase <drchase@google.com>	2021-03-31 17:15:59 +0000
commit	96139f25993f3d8122e27a6fec877a4d4f69f83b (patch)
tree	7017ee1f9b9eae9cb8af38a2e6cea121bfb4a8b1
parent	887c0d890fd33a72cc1de6316252ba37649145cb (diff)
download	go-96139f25993f3d8122e27a6fec877a4d4f69f83b.tar.gz go-96139f25993f3d8122e27a6fec877a4d4f69f83b.zip