aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/rewriteARM64.go
diff options
context:
space:
mode:
authorJonathan Swinney <jswinney@amazon.com>2020-11-04 16:18:23 +0000
committerCherry Zhang <cherryyz@google.com>2020-11-05 23:21:33 +0000
commitecc3f5112ebaf23c4b1ac4c5eedfa406d82ecc9a (patch)
treef73baf58f3df5fcdcec20b65e3223260f14472ea /src/cmd/compile/internal/ssa/rewriteARM64.go
parent8e5778ed70ec3d371615a663520a586745fb7bee (diff)
downloadgo-ecc3f5112ebaf23c4b1ac4c5eedfa406d82ecc9a.tar.gz
go-ecc3f5112ebaf23c4b1ac4c5eedfa406d82ecc9a.zip
cmd/compile: improve atomic swap intrinsics on arm64
ARMv8.1 has added new instructions for atomic memory operations. This change builds on the previous change which added support for atomic add, 0a7ac93c27c9ade79fe0f66ae0bb81484c241ae5, to include similar support for atomic-compare-and-swap, atomic-swap, atomic-or, and atomic-and intrinsics. Since the new instructions are not guaranteed to be present, we guard their usages with a branch on a CPU feature. Peformance on an ARMv8.1 machine: name old time/op new time/op delta CompareAndSwap-16 37.9ns ±16% 24.1ns ± 4% -36.44% (p=0.000 n=10+9) CompareAndSwap64-16 38.6ns ±15% 24.1ns ± 3% -37.47% (p=0.000 n=10+10) name old time/op new time/op delta Swap-16 46.9ns ±32% 12.5ns ± 6% -73.40% (p=0.000 n=10+10) Swap64-16 53.4ns ± 1% 12.5ns ± 6% -76.56% (p=0.000 n=10+10) name old time/op new time/op delta Or8-16 8.81ns ± 0% 5.61ns ± 0% -36.32% (p=0.000 n=10+10) Or-16 7.21ns ± 0% 5.61ns ± 0% -22.19% (p=0.000 n=10+10) Or8Parallel-16 59.8ns ± 3% 12.5ns ± 2% -79.10% (p=0.000 n=10+10) OrParallel-16 51.7ns ± 3% 12.5ns ± 2% -75.84% (p=0.000 n=10+10) name old time/op new time/op delta And8-16 8.81ns ± 0% 5.61ns ± 0% -36.32% (p=0.000 n=10+10) And-16 7.21ns ± 0% 5.61ns ± 0% -22.19% (p=0.000 n=10+10) And8Parallel-16 59.1ns ± 6% 12.8ns ± 3% -78.33% (p=0.000 n=10+10) AndParallel-16 51.4ns ± 7% 12.8ns ± 3% -75.03% (p=0.000 n=10+10) Performance on an ARMv8.0 machine (no atomics instructions): name old time/op new time/op delta CompareAndSwap-16 61.3ns ± 0% 62.4ns ± 0% +1.70% (p=0.000 n=8+9) CompareAndSwap64-16 62.0ns ± 3% 61.3ns ± 2% ~ (p=0.093 n=10+10) name old time/op new time/op delta Swap-16 127ns ± 2% 131ns ± 2% +2.91% (p=0.001 n=10+10) Swap64-16 128ns ± 1% 131ns ± 2% +2.43% (p=0.001 n=10+10) name old time/op new time/op delta Or8-16 14.9ns ± 0% 15.3ns ± 0% +2.68% (p=0.000 n=10+10) Or-16 11.8ns ± 0% 12.3ns ± 0% +4.24% (p=0.000 n=10+10) Or8Parallel-16 137ns ± 1% 144ns ± 1% +4.97% (p=0.000 n=10+10) OrParallel-16 128ns ± 1% 136ns ± 1% +6.34% (p=0.000 n=10+10) name old time/op new time/op delta And8-16 14.9ns ± 0% 15.3ns ± 0% +2.68% (p=0.000 n=10+10) And-16 11.8ns ± 0% 12.3ns ± 0% +4.24% (p=0.000 n=10+10) And8Parallel-16 134ns ± 2% 141ns ± 1% +5.29% (p=0.000 n=10+10) AndParallel-16 125ns ± 2% 134ns ± 1% +7.10% (p=0.000 n=10+10) Fixes #39304 Change-Id: Idaca68701d4751650be6b4bedca3d57f51571712 Reviewed-on: https://go-review.googlesource.com/c/go/+/234217 Run-TryBot: Emmanuel Odeke <emmanuel@orijtech.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com> Trust: fannie zhang <Fannie.Zhang@arm.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/rewriteARM64.go')
-rw-r--r--src/cmd/compile/internal/ssa/rewriteARM64.go96
1 files changed, 96 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 9a5e976dea..353696bf39 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -426,20 +426,36 @@ func rewriteValueARM64(v *Value) bool {
return true
case OpAtomicAnd32:
return rewriteValueARM64_OpAtomicAnd32(v)
+ case OpAtomicAnd32Variant:
+ return rewriteValueARM64_OpAtomicAnd32Variant(v)
case OpAtomicAnd8:
return rewriteValueARM64_OpAtomicAnd8(v)
+ case OpAtomicAnd8Variant:
+ return rewriteValueARM64_OpAtomicAnd8Variant(v)
case OpAtomicCompareAndSwap32:
v.Op = OpARM64LoweredAtomicCas32
return true
+ case OpAtomicCompareAndSwap32Variant:
+ v.Op = OpARM64LoweredAtomicCas32Variant
+ return true
case OpAtomicCompareAndSwap64:
v.Op = OpARM64LoweredAtomicCas64
return true
+ case OpAtomicCompareAndSwap64Variant:
+ v.Op = OpARM64LoweredAtomicCas64Variant
+ return true
case OpAtomicExchange32:
v.Op = OpARM64LoweredAtomicExchange32
return true
+ case OpAtomicExchange32Variant:
+ v.Op = OpARM64LoweredAtomicExchange32Variant
+ return true
case OpAtomicExchange64:
v.Op = OpARM64LoweredAtomicExchange64
return true
+ case OpAtomicExchange64Variant:
+ v.Op = OpARM64LoweredAtomicExchange64Variant
+ return true
case OpAtomicLoad32:
v.Op = OpARM64LDARW
return true
@@ -454,8 +470,12 @@ func rewriteValueARM64(v *Value) bool {
return true
case OpAtomicOr32:
return rewriteValueARM64_OpAtomicOr32(v)
+ case OpAtomicOr32Variant:
+ return rewriteValueARM64_OpAtomicOr32Variant(v)
case OpAtomicOr8:
return rewriteValueARM64_OpAtomicOr8(v)
+ case OpAtomicOr8Variant:
+ return rewriteValueARM64_OpAtomicOr8Variant(v)
case OpAtomicStore32:
v.Op = OpARM64STLRW
return true
@@ -21363,6 +21383,25 @@ func rewriteValueARM64_OpAtomicAnd32(v *Value) bool {
return true
}
}
+func rewriteValueARM64_OpAtomicAnd32Variant(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AtomicAnd32Variant ptr val mem)
+ // result: (Select1 (LoweredAtomicAnd32Variant ptr val mem))
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpSelect1)
+ v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32Variant, types.NewTuple(typ.UInt32, types.TypeMem))
+ v0.AddArg3(ptr, val, mem)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueARM64_OpAtomicAnd8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
@@ -21382,6 +21421,25 @@ func rewriteValueARM64_OpAtomicAnd8(v *Value) bool {
return true
}
}
+func rewriteValueARM64_OpAtomicAnd8Variant(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AtomicAnd8Variant ptr val mem)
+ // result: (Select1 (LoweredAtomicAnd8Variant ptr val mem))
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpSelect1)
+ v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd8Variant, types.NewTuple(typ.UInt8, types.TypeMem))
+ v0.AddArg3(ptr, val, mem)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueARM64_OpAtomicOr32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
@@ -21401,6 +21459,25 @@ func rewriteValueARM64_OpAtomicOr32(v *Value) bool {
return true
}
}
+func rewriteValueARM64_OpAtomicOr32Variant(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AtomicOr32Variant ptr val mem)
+ // result: (Select1 (LoweredAtomicOr32Variant ptr val mem))
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpSelect1)
+ v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32Variant, types.NewTuple(typ.UInt32, types.TypeMem))
+ v0.AddArg3(ptr, val, mem)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
@@ -21420,6 +21497,25 @@ func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
return true
}
}
+func rewriteValueARM64_OpAtomicOr8Variant(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (AtomicOr8Variant ptr val mem)
+ // result: (Select1 (LoweredAtomicOr8Variant ptr val mem))
+ for {
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpSelect1)
+ v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr8Variant, types.NewTuple(typ.UInt8, types.TypeMem))
+ v0.AddArg3(ptr, val, mem)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueARM64_OpAvg64u(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]