cmd/compile: add rule to coalesce writes

The code generated when storing eight bytes loaded from memory created a series of small writes instead of a single, large one. The specific pattern of instructions generated stored 1 byte, then 2 bytes, then 4 bytes, and finally 1 byte. The new rules match this specific pattern both for amd64 and for s390x, and convert it into a single instruction to store the 8 bytes. arm64 and ppc64le already generated the right code, but the new codegen test covers also those architectures. Fixes #41663 Change-Id: Ifb9b464be2d59c2ed5034acf7b9c3e473f344030 Reviewed-on: https://go-review.googlesource.com/c/go/+/280456 Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com> Trust: Josh Bleecher Snyder <josharian@gmail.com> Trust: Jason A. Donenfeld <Jason@zx2c4.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org>
author: Alejandro García Montoro <alejandro.garciamontoro@gmail.com> 2020-12-30 18:41:36 +0100
committer: Josh Bleecher Snyder <josharian@gmail.com> 2021-02-24 19:25:49 +0000
commit: bf48163e8f2b604f3b9e83951e331cd11edd8495 (patch)
tree: f331bc8eb53e8fd69fb722384958cbff86695ab0 /src/cmd/compile/internal/ssa/gen
parent: b7f62daa59ea5983d5825e166abc527d4ea69777 (diff)
download: go-bf48163e8f2b604f3b9e83951e331cd11edd8495.tar.gz
go-bf48163e8f2b604f3b9e83951e331cd11edd8495.zip
2 files changed, 20 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 01a8a16456..f2bcbd2dfc 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -1969,6 +1969,16 @@
   && clobber(x)
   => (MOVQstore [i] {s} p0 w0 mem)
 
+(MOVBstore [7] p1 (SHRQconst [56] w)
+  x1:(MOVWstore [5] p1 (SHRQconst [40] w)
+  x2:(MOVLstore [1] p1 (SHRQconst [8] w)
+  x3:(MOVBstore p1 w mem))))
+  && x1.Uses == 1
+  && x2.Uses == 1
+  && x3.Uses == 1
+  && clobber(x1, x2, x3)
+  => (MOVQstore p1 w mem)
+
 (MOVBstore [i] {s} p
   x1:(MOVBload [j] {s2} p2 mem)
     mem2:(MOVBstore [i-1] {s} p
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index c3421da0a2..7111d5e11a 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -1420,6 +1420,16 @@
   && clobber(x)
   => (MOVDBRstore [i-4] {s} p w0 mem)
 
+(MOVBstore [7] p1 (SRDconst w)
+  x1:(MOVHBRstore [5] p1 (SRDconst w)
+  x2:(MOVWBRstore [1] p1 (SRDconst w)
+  x3:(MOVBstore p1 w mem))))
+  && x1.Uses == 1
+  && x2.Uses == 1
+  && x3.Uses == 1
+  && clobber(x1, x2, x3)
+  => (MOVDBRstore p1 w mem)
+
 // Combining byte loads into larger (unaligned) loads.
 
 // Big-endian loads
author	Alejandro García Montoro <alejandro.garciamontoro@gmail.com>	2020-12-30 18:41:36 +0100
committer	Josh Bleecher Snyder <josharian@gmail.com>	2021-02-24 19:25:49 +0000
commit	bf48163e8f2b604f3b9e83951e331cd11edd8495 (patch)
tree	f331bc8eb53e8fd69fb722384958cbff86695ab0 /src/cmd/compile/internal/ssa/gen
parent	b7f62daa59ea5983d5825e166abc527d4ea69777 (diff)
download	go-bf48163e8f2b604f3b9e83951e331cd11edd8495.tar.gz go-bf48163e8f2b604f3b9e83951e331cd11edd8495.zip