From 8c840b10d0c34091349fc224756924fea9c54c9b Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 30 Nov 2023 10:04:16 -0800 Subject: [release-branch.go1.21] cmd/compile: fix memcombine pass for big endian, > 1 byte elements The shift amounts were wrong in this case, leading to miscompilation of load combining. Also the store combining was not triggering when it should. Fixes #64472 Change-Id: Iaeb08972c5fc1d6f628800334789c6af7216e87b Reviewed-on: https://go-review.googlesource.com/c/go/+/546355 Reviewed-by: David Chase Reviewed-by: Mauri de Souza Meneguzzo Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-on: https://go-review.googlesource.com/c/go/+/546356 --- src/cmd/compile/internal/ssa/memcombine.go | 12 +-- src/cmd/compile/internal/test/memcombine_test.go | 126 +++++++++++++++++++++++ test/codegen/memcombine.go | 37 +++++++ 3 files changed, 169 insertions(+), 6 deletions(-) diff --git a/src/cmd/compile/internal/ssa/memcombine.go b/src/cmd/compile/internal/ssa/memcombine.go index c1346434c9..26fb3f5454 100644 --- a/src/cmd/compile/internal/ssa/memcombine.go +++ b/src/cmd/compile/internal/ssa/memcombine.go @@ -313,8 +313,8 @@ func combineLoads(root *Value, n int64) bool { if isLittleEndian && shift0 != 0 { v = leftShift(loadBlock, pos, v, shift0) } - if isBigEndian && shift0-(n-1)*8 != 0 { - v = leftShift(loadBlock, pos, v, shift0-(n-1)*8) + if isBigEndian && shift0-(n-1)*size*8 != 0 { + v = leftShift(loadBlock, pos, v, shift0-(n-1)*size*8) } // Install with (Copy v). @@ -588,14 +588,14 @@ func combineStores(root *Value, n int64) bool { isLittleEndian := true shift0 := shift(a[0].store, shiftBase) for i := int64(1); i < n; i++ { - if shift(a[i].store, shiftBase) != shift0+i*8 { + if shift(a[i].store, shiftBase) != shift0+i*size*8 { isLittleEndian = false break } } isBigEndian := true for i := int64(1); i < n; i++ { - if shift(a[i].store, shiftBase) != shift0-i*8 { + if shift(a[i].store, shiftBase) != shift0-i*size*8 { isBigEndian = false break } @@ -618,8 +618,8 @@ func combineStores(root *Value, n int64) bool { if isLittleEndian && shift0 != 0 { sv = rightShift(root.Block, root.Pos, sv, shift0) } - if isBigEndian && shift0-(n-1)*8 != 0 { - sv = rightShift(root.Block, root.Pos, sv, shift0-(n-1)*8) + if isBigEndian && shift0-(n-1)*size*8 != 0 { + sv = rightShift(root.Block, root.Pos, sv, shift0-(n-1)*size*8) } if sv.Type.Size() > size*n { sv = truncate(root.Block, root.Pos, sv, sv.Type.Size(), size*n) diff --git a/src/cmd/compile/internal/test/memcombine_test.go b/src/cmd/compile/internal/test/memcombine_test.go index c7e7a208dd..3fc4a004a3 100644 --- a/src/cmd/compile/internal/test/memcombine_test.go +++ b/src/cmd/compile/internal/test/memcombine_test.go @@ -71,3 +71,129 @@ func readUint32be(b []byte) uint64 { //go:noinline func nop() { } + +type T32 struct { + a, b uint32 +} + +//go:noinline +func (t *T32) bigEndianLoad() uint64 { + return uint64(t.a)<<32 | uint64(t.b) +} + +//go:noinline +func (t *T32) littleEndianLoad() uint64 { + return uint64(t.a) | (uint64(t.b) << 32) +} + +//go:noinline +func (t *T32) bigEndianStore(x uint64) { + t.a = uint32(x >> 32) + t.b = uint32(x) +} + +//go:noinline +func (t *T32) littleEndianStore(x uint64) { + t.a = uint32(x) + t.b = uint32(x >> 32) +} + +type T16 struct { + a, b uint16 +} + +//go:noinline +func (t *T16) bigEndianLoad() uint32 { + return uint32(t.a)<<16 | uint32(t.b) +} + +//go:noinline +func (t *T16) littleEndianLoad() uint32 { + return uint32(t.a) | (uint32(t.b) << 16) +} + +//go:noinline +func (t *T16) bigEndianStore(x uint32) { + t.a = uint16(x >> 16) + t.b = uint16(x) +} + +//go:noinline +func (t *T16) littleEndianStore(x uint32) { + t.a = uint16(x) + t.b = uint16(x >> 16) +} + +type T8 struct { + a, b uint8 +} + +//go:noinline +func (t *T8) bigEndianLoad() uint16 { + return uint16(t.a)<<8 | uint16(t.b) +} + +//go:noinline +func (t *T8) littleEndianLoad() uint16 { + return uint16(t.a) | (uint16(t.b) << 8) +} + +//go:noinline +func (t *T8) bigEndianStore(x uint16) { + t.a = uint8(x >> 8) + t.b = uint8(x) +} + +//go:noinline +func (t *T8) littleEndianStore(x uint16) { + t.a = uint8(x) + t.b = uint8(x >> 8) +} + +func TestIssue64468(t *testing.T) { + t32 := T32{1, 2} + if got, want := t32.bigEndianLoad(), uint64(1<<32+2); got != want { + t.Errorf("T32.bigEndianLoad got %x want %x\n", got, want) + } + if got, want := t32.littleEndianLoad(), uint64(1+2<<32); got != want { + t.Errorf("T32.littleEndianLoad got %x want %x\n", got, want) + } + t16 := T16{1, 2} + if got, want := t16.bigEndianLoad(), uint32(1<<16+2); got != want { + t.Errorf("T16.bigEndianLoad got %x want %x\n", got, want) + } + if got, want := t16.littleEndianLoad(), uint32(1+2<<16); got != want { + t.Errorf("T16.littleEndianLoad got %x want %x\n", got, want) + } + t8 := T8{1, 2} + if got, want := t8.bigEndianLoad(), uint16(1<<8+2); got != want { + t.Errorf("T8.bigEndianLoad got %x want %x\n", got, want) + } + if got, want := t8.littleEndianLoad(), uint16(1+2<<8); got != want { + t.Errorf("T8.littleEndianLoad got %x want %x\n", got, want) + } + t32.bigEndianStore(1<<32 + 2) + if got, want := t32, (T32{1, 2}); got != want { + t.Errorf("T32.bigEndianStore got %x want %x\n", got, want) + } + t32.littleEndianStore(1<<32 + 2) + if got, want := t32, (T32{2, 1}); got != want { + t.Errorf("T32.littleEndianStore got %x want %x\n", got, want) + } + t16.bigEndianStore(1<<16 + 2) + if got, want := t16, (T16{1, 2}); got != want { + t.Errorf("T16.bigEndianStore got %x want %x\n", got, want) + } + t16.littleEndianStore(1<<16 + 2) + if got, want := t16, (T16{2, 1}); got != want { + t.Errorf("T16.littleEndianStore got %x want %x\n", got, want) + } + t8.bigEndianStore(1<<8 + 2) + if got, want := t8, (T8{1, 2}); got != want { + t.Errorf("T8.bigEndianStore got %x want %x\n", got, want) + } + t8.littleEndianStore(1<<8 + 2) + if got, want := t8, (T8{2, 1}); got != want { + t.Errorf("T8.littleEndianStore got %x want %x\n", got, want) + } +} diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index 0d1c390dfc..3cf4fbc0ae 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -836,3 +836,40 @@ func zero_uint64_2(d1, d2 []uint64) { d1[0], d1[1] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH" d2[1], d2[0] = 0, 0 // arm64:"STP",-"MOVB",-"MOVH" } + +func store32le(p *struct{ a, b uint32 }, x uint64) { + // amd64:"MOVQ",-"MOVL",-"SHRQ" + // arm64:"MOVD",-"MOVW",-"LSR" + // ppc64le:"MOVD",-"MOVW",-"SRD" + p.a = uint32(x) + // amd64:-"MOVL",-"SHRQ" + // arm64:-"MOVW",-"LSR" + // ppc64le:-"MOVW",-"SRD" + p.b = uint32(x >> 32) +} +func store32be(p *struct{ a, b uint32 }, x uint64) { + // ppc64:"MOVD",-"MOVW",-"SRD" + // s390x:"MOVD",-"MOVW",-"SRD" + p.a = uint32(x >> 32) + // ppc64:-"MOVW",-"SRD" + // s390x:-"MOVW",-"SRD" + p.b = uint32(x) +} +func store16le(p *struct{ a, b uint16 }, x uint32) { + // amd64:"MOVL",-"MOVW",-"SHRL" + // arm64:"MOVW",-"MOVH",-"UBFX" + // ppc64le:"MOVW",-"MOVH",-"SRW" + p.a = uint16(x) + // amd64:-"MOVW",-"SHRL" + // arm64:-"MOVH",-"UBFX" + // ppc64le:-"MOVH",-"SRW" + p.b = uint16(x >> 16) +} +func store16be(p *struct{ a, b uint16 }, x uint32) { + // ppc64:"MOVW",-"MOVH",-"SRW" + // s390x:"MOVW",-"MOVH",-"SRW" + p.a = uint16(x >> 16) + // ppc64:-"MOVH",-"SRW" + // s390x:-"MOVH",-"SRW" + p.b = uint16(x) +} -- cgit v1.2.3-54-g00ecf