diff options
author | Paul E. Murphy <murp@ibm.com> | 2020-10-23 12:12:34 -0500 |
---|---|---|
committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2020-10-27 18:33:20 +0000 |
commit | c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24 (patch) | |
tree | 67a121a1ecd4bec56887f14e5894231017e73bb9 /src/cmd/compile/internal/ssa/rewrite_test.go | |
parent | e3bb53a7683eb9c3d04c09f28abb4cf9aa89a7c1 (diff) | |
download | go-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.tar.gz go-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.zip |
cmd/compile: combine more 32 bit shift and mask operations on ppc64
Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is
and the shift value produce constant which can be encoded into an
RLWINM instruction.
Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate
masks produces a constant which can be encoded into RLWINM.
Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)).
Combine rotate word + and operations which can be encoded as a single
RLWINM/RLWNM instruction.
The most notable performance improvements arise from the crypto
benchmarks below (GOARCH=power8 on a ppc64le/linux):
pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le
ExpandKeyWithSalt 52.2µs ± 0% 47.5µs ± 0% -8.88%
ExpandKey 44.4µs ± 0% 40.3µs ± 0% -9.15%
pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le
Key 57.6ms ± 0% 52.3ms ± 0% -9.13%
pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le
Equal 90.9ms ± 0% 82.6ms ± 0% -9.13%
DefaultCost 91.0ms ± 0% 82.7ms ± 0% -9.12%
Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce
Reviewed-on: https://go-review.googlesource.com/c/go/+/260798
Run-TryBot: Carlos Eduardo Seo <carlos.seo@linaro.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.com>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/rewrite_test.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/rewrite_test.go | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/rewrite_test.go b/src/cmd/compile/internal/ssa/rewrite_test.go index 1a15d8c940..6fe429e85a 100644 --- a/src/cmd/compile/internal/ssa/rewrite_test.go +++ b/src/cmd/compile/internal/ssa/rewrite_test.go @@ -36,3 +36,184 @@ func TestSubFlags(t *testing.T) { t.Errorf("subFlags32(0,1).ult() returned false") } } + +func TestIsPPC64WordRotateMask(t *testing.T) { + tests := []struct { + input int64 + expected bool + }{ + {0x00000001, true}, + {0x80000001, true}, + {0x80010001, false}, + {0xFFFFFFFA, false}, + {0xF0F0F0F0, false}, + {0xFFFFFFFD, true}, + {0x80000000, true}, + {0x00000000, false}, + {0xFFFFFFFF, true}, + {0x0000FFFF, true}, + {0xFF0000FF, true}, + {0x00FFFF00, true}, + } + + for _, v := range tests { + if v.expected != isPPC64WordRotateMask(v.input) { + t.Errorf("isPPC64WordRotateMask(0x%x) failed", v.input) + } + } +} + +func TestEncodeDecodePPC64WordRotateMask(t *testing.T) { + tests := []struct { + rotate int64 + mask uint64 + nbits, + mb, + me, + encoded int64 + }{ + {1, 0x00000001, 32, 31, 31, 0x20011f20}, + {2, 0x80000001, 32, 31, 0, 0x20021f01}, + {3, 0xFFFFFFFD, 32, 31, 29, 0x20031f1e}, + {4, 0x80000000, 32, 0, 0, 0x20040001}, + {5, 0xFFFFFFFF, 32, 0, 31, 0x20050020}, + {6, 0x0000FFFF, 32, 16, 31, 0x20061020}, + {7, 0xFF0000FF, 32, 24, 7, 0x20071808}, + {8, 0x00FFFF00, 32, 8, 23, 0x20080818}, + + {9, 0x0000000000FFFF00, 64, 40, 55, 0x40092838}, + {10, 0xFFFF000000000000, 64, 0, 15, 0x400A0010}, + {10, 0xFFFF000000000001, 64, 63, 15, 0x400A3f10}, + } + + for i, v := range tests { + result := encodePPC64RotateMask(v.rotate, int64(v.mask), v.nbits) + if result != v.encoded { + t.Errorf("encodePPC64RotateMask(%d,0x%x,%d) = 0x%x, expected 0x%x", v.rotate, v.mask, v.nbits, result, v.encoded) + } + rotate, mb, me, mask := DecodePPC64RotateMask(result) + if rotate != v.rotate || mb != v.mb || me != v.me || mask != v.mask { + t.Errorf("DecodePPC64Failure(Test %d) got (%d, %d, %d, %x) expected (%d, %d, %d, %x)", i, rotate, mb, me, mask, v.rotate, v.mb, v.me, v.mask) + } + } +} + +func TestMergePPC64ClrlsldiSrw(t *testing.T) { + tests := []struct { + clrlsldi int32 + srw int64 + valid bool + rotate int64 + mask uint64 + }{ + // ((x>>4)&0xFF)<<4 + {newPPC64ShiftAuxInt(4, 56, 63, 64), 4, true, 0, 0xFF0}, + // ((x>>4)&0xFFFF)<<4 + {newPPC64ShiftAuxInt(4, 48, 63, 64), 4, true, 0, 0xFFFF0}, + // ((x>>4)&0xFFFF)<<17 + {newPPC64ShiftAuxInt(17, 48, 63, 64), 4, false, 0, 0}, + // ((x>>4)&0xFFFF)<<16 + {newPPC64ShiftAuxInt(16, 48, 63, 64), 4, true, 12, 0xFFFF0000}, + // ((x>>32)&0xFFFF)<<17 + {newPPC64ShiftAuxInt(17, 48, 63, 64), 32, false, 0, 0}, + } + for i, v := range tests { + result := mergePPC64ClrlsldiSrw(int64(v.clrlsldi), v.srw) + if v.valid && result == 0 { + t.Errorf("mergePPC64ClrlsldiSrw(Test %d) did not merge", i) + } else if !v.valid && result != 0 { + t.Errorf("mergePPC64ClrlsldiSrw(Test %d) should return 0", i) + } else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m { + t.Errorf("mergePPC64ClrlsldiSrw(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask) + } + } +} + +func TestMergePPC64ClrlsldiRlwinm(t *testing.T) { + tests := []struct { + clrlsldi int32 + rlwinm int64 + valid bool + rotate int64 + mask uint64 + }{ + // ((x<<4)&0xFF00)<<4 + {newPPC64ShiftAuxInt(4, 56, 63, 64), encodePPC64RotateMask(4, 0xFF00, 32), false, 0, 0}, + // ((x>>4)&0xFF)<<4 + {newPPC64ShiftAuxInt(4, 56, 63, 64), encodePPC64RotateMask(28, 0x0FFFFFFF, 32), true, 0, 0xFF0}, + // ((x>>4)&0xFFFF)<<4 + {newPPC64ShiftAuxInt(4, 48, 63, 64), encodePPC64RotateMask(28, 0xFFFF, 32), true, 0, 0xFFFF0}, + // ((x>>4)&0xFFFF)<<17 + {newPPC64ShiftAuxInt(17, 48, 63, 64), encodePPC64RotateMask(28, 0xFFFF, 32), false, 0, 0}, + // ((x>>4)&0xFFFF)<<16 + {newPPC64ShiftAuxInt(16, 48, 63, 64), encodePPC64RotateMask(28, 0xFFFF, 32), true, 12, 0xFFFF0000}, + // ((x>>4)&0xF000FFFF)<<16 + {newPPC64ShiftAuxInt(16, 48, 63, 64), encodePPC64RotateMask(28, 0xF000FFFF, 32), true, 12, 0xFFFF0000}, + } + for i, v := range tests { + result := mergePPC64ClrlsldiRlwinm(v.clrlsldi, v.rlwinm) + if v.valid && result == 0 { + t.Errorf("mergePPC64ClrlsldiRlwinm(Test %d) did not merge", i) + } else if !v.valid && result != 0 { + t.Errorf("mergePPC64ClrlsldiRlwinm(Test %d) should return 0", i) + } else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m { + t.Errorf("mergePPC64ClrlsldiRlwinm(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask) + } + } +} + +func TestMergePPC64SldiSrw(t *testing.T) { + tests := []struct { + sld int64 + srw int64 + valid bool + rotate int64 + mask uint64 + }{ + {4, 4, true, 0, 0xFFFFFFF0}, + {4, 8, true, 28, 0x0FFFFFF0}, + {0, 0, true, 0, 0xFFFFFFFF}, + {8, 4, false, 0, 0}, + {0, 32, false, 0, 0}, + {0, 31, true, 1, 0x1}, + {31, 31, true, 0, 0x80000000}, + {32, 32, false, 0, 0}, + } + for i, v := range tests { + result := mergePPC64SldiSrw(v.sld, v.srw) + if v.valid && result == 0 { + t.Errorf("mergePPC64SldiSrw(Test %d) did not merge", i) + } else if !v.valid && result != 0 { + t.Errorf("mergePPC64SldiSrw(Test %d) should return 0", i) + } else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m { + t.Errorf("mergePPC64SldiSrw(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask) + } + } +} + +func TestMergePPC64AndSrwi(t *testing.T) { + tests := []struct { + and int64 + srw int64 + valid bool + rotate int64 + mask uint64 + }{ + {0x000000FF, 8, true, 24, 0xFF}, + {0xF00000FF, 8, true, 24, 0xFF}, + {0x0F0000FF, 4, false, 0, 0}, + {0x00000000, 4, false, 0, 0}, + {0xF0000000, 4, false, 0, 0}, + {0xF0000000, 32, false, 0, 0}, + } + for i, v := range tests { + result := mergePPC64AndSrwi(v.and, v.srw) + if v.valid && result == 0 { + t.Errorf("mergePPC64AndSrwi(Test %d) did not merge", i) + } else if !v.valid && result != 0 { + t.Errorf("mergePPC64AndSrwi(Test %d) should return 0", i) + } else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m { + t.Errorf("mergePPC64AndSrwi(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask) + } + } +} |