aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/gen
diff options
context:
space:
mode:
authorAlejandro GarcĂ­a Montoro <alejandro.garciamontoro@gmail.com>2020-12-30 18:41:36 +0100
committerJosh Bleecher Snyder <josharian@gmail.com>2021-02-24 19:25:49 +0000
commitbf48163e8f2b604f3b9e83951e331cd11edd8495 (patch)
treef331bc8eb53e8fd69fb722384958cbff86695ab0 /src/cmd/compile/internal/ssa/gen
parentb7f62daa59ea5983d5825e166abc527d4ea69777 (diff)
downloadgo-bf48163e8f2b604f3b9e83951e331cd11edd8495.tar.gz
go-bf48163e8f2b604f3b9e83951e331cd11edd8495.zip
cmd/compile: add rule to coalesce writes
The code generated when storing eight bytes loaded from memory created a series of small writes instead of a single, large one. The specific pattern of instructions generated stored 1 byte, then 2 bytes, then 4 bytes, and finally 1 byte. The new rules match this specific pattern both for amd64 and for s390x, and convert it into a single instruction to store the 8 bytes. arm64 and ppc64le already generated the right code, but the new codegen test covers also those architectures. Fixes #41663 Change-Id: Ifb9b464be2d59c2ed5034acf7b9c3e473f344030 Reviewed-on: https://go-review.googlesource.com/c/go/+/280456 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Trust: Josh Bleecher Snyder <josharian@gmail.com> Trust: Jason A. Donenfeld <Jason@zx2c4.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen')
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64.rules10
-rw-r--r--src/cmd/compile/internal/ssa/gen/S390X.rules10
2 files changed, 20 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 01a8a16456..f2bcbd2dfc 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -1969,6 +1969,16 @@
&& clobber(x)
=> (MOVQstore [i] {s} p0 w0 mem)
+(MOVBstore [7] p1 (SHRQconst [56] w)
+ x1:(MOVWstore [5] p1 (SHRQconst [40] w)
+ x2:(MOVLstore [1] p1 (SHRQconst [8] w)
+ x3:(MOVBstore p1 w mem))))
+ && x1.Uses == 1
+ && x2.Uses == 1
+ && x3.Uses == 1
+ && clobber(x1, x2, x3)
+ => (MOVQstore p1 w mem)
+
(MOVBstore [i] {s} p
x1:(MOVBload [j] {s2} p2 mem)
mem2:(MOVBstore [i-1] {s} p
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index c3421da0a2..7111d5e11a 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -1420,6 +1420,16 @@
&& clobber(x)
=> (MOVDBRstore [i-4] {s} p w0 mem)
+(MOVBstore [7] p1 (SRDconst w)
+ x1:(MOVHBRstore [5] p1 (SRDconst w)
+ x2:(MOVWBRstore [1] p1 (SRDconst w)
+ x3:(MOVBstore p1 w mem))))
+ && x1.Uses == 1
+ && x2.Uses == 1
+ && x3.Uses == 1
+ && clobber(x1, x2, x3)
+ => (MOVDBRstore p1 w mem)
+
// Combining byte loads into larger (unaligned) loads.
// Big-endian loads