aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorXiangdong Ji <xiangdong.ji@arm.com>2020-06-01 11:01:14 +0000
committerKeith Randall <khr@golang.org>2020-06-09 15:50:33 +0000
commite031318ca6da8db8a08ecff734ae72290dfb5f2d (patch)
tree1ac1f28637427a42f3726b5734a0931f0534a6b4 /test
parentbdcf33f1c0597bf18924c17ab2644310d204bf23 (diff)
downloadgo-e031318ca6da8db8a08ecff734ae72290dfb5f2d.tar.gz
go-e031318ca6da8db8a08ecff734ae72290dfb5f2d.zip
cmd/compile: ARM comparisons with 0 incorrect on overflow
Some ARM rewriting rules convert 'comparing to zero' conditions of if statements to a simplified version utilizing CMN and CMP instructions to branch over condition flags, in order to save one Add or Sub caculation. Such optimizations lead to wrong branching in case an overflow/underflow occurs when executing CMN or CMP. Fix the issue by introducing new block opcodes that don't honor the overflow/underflow flag: Block-Op Meaning ARM condition codes 1. LTnoov less than MI 2. GEnoov greater than or equal PL 3. LEnoov less than or equal MI || EQ 4. GTnoov greater than NEQ & PL The patch also adds a few test cases to cover scenarios that are specific to ARM and fine-tunes the code generation tests for 'x-const'. For more details please refer to the previous fix on 64-bit ARM: https://go-review.googlesource.com/c/go/+/233097 Go1 perf, 'old' is the non-optimized version, that is removing all concerned rewriting rules. name old time/op new time/op delta BinaryTree17-8 7.73s ± 0% 7.81s ± 0% +0.97% (p=0.000 n=7+8) Fannkuch11-8 7.06s ± 0% 7.00s ± 0% -0.83% (p=0.000 n=8+8) FmtFprintfEmpty-8 181ns ± 1% 183ns ± 1% +1.31% (p=0.001 n=8+8) FmtFprintfString-8 319ns ± 1% 325ns ± 2% +1.71% (p=0.009 n=7+8) FmtFprintfInt-8 358ns ± 1% 359ns ± 1% ~ (p=0.293 n=7+7) FmtFprintfIntInt-8 459ns ± 3% 456ns ± 1% ~ (p=0.869 n=8+8) FmtFprintfPrefixedInt-8 535ns ± 4% 538ns ± 4% ~ (p=0.572 n=8+8) FmtFprintfFloat-8 1.01µs ± 2% 1.01µs ± 2% ~ (p=0.625 n=8+8) FmtManyArgs-8 1.93µs ± 2% 1.93µs ± 1% ~ (p=0.979 n=8+7) GobDecode-8 16.1ms ± 1% 16.5ms ± 1% +2.32% (p=0.000 n=8+8) GobEncode-8 15.9ms ± 0% 15.8ms ± 1% -1.00% (p=0.000 n=8+7) Gzip-8 690ms ± 1% 670ms ± 0% -2.90% (p=0.000 n=8+8) Gunzip-8 109ms ± 1% 109ms ± 1% ~ (p=0.694 n=7+8) HTTPClientServer-8 149µs ± 3% 146µs ± 2% -1.70% (p=0.028 n=8+8) JSONEncode-8 50.5ms ± 1% 49.2ms ± 0% -2.60% (p=0.001 n=7+7) JSONDecode-8 135ms ± 2% 137ms ± 1% ~ (p=0.054 n=8+7) Mandelbrot200-8 951ms ± 0% 952ms ± 0% ~ (p=0.852 n=6+8) GoParse-8 9.47ms ± 1% 9.66ms ± 1% +2.01% (p=0.000 n=8+8) RegexpMatchEasy0_32-8 288ns ± 2% 277ns ± 2% -3.61% (p=0.000 n=8+8) RegexpMatchEasy0_1K-8 1.66µs ± 1% 1.69µs ± 2% +2.21% (p=0.001 n=7+7) RegexpMatchEasy1_32-8 334ns ± 1% 305ns ± 2% -8.86% (p=0.000 n=8+8) RegexpMatchEasy1_1K-8 2.14µs ± 2% 2.15µs ± 0% ~ (p=0.099 n=8+8) RegexpMatchMedium_32-8 13.3ns ± 1% 13.3ns ± 0% ~ (p=1.000 n=7+7) RegexpMatchMedium_1K-8 81.1µs ± 3% 80.7µs ± 1% ~ (p=0.955 n=7+8) RegexpMatchHard_32-8 4.26µs ± 0% 4.26µs ± 0% ~ (p=0.933 n=7+8) RegexpMatchHard_1K-8 124µs ± 0% 124µs ± 0% +0.31% (p=0.000 n=8+8) Revcomp-8 14.7ms ± 2% 14.5ms ± 1% -1.66% (p=0.003 n=8+8) Template-8 197ms ± 2% 200ms ± 3% +1.62% (p=0.021 n=8+8) TimeParse-8 1.33µs ± 1% 1.30µs ± 1% -1.86% (p=0.002 n=8+8) TimeFormat-8 3.04µs ± 1% 3.02µs ± 0% -0.60% (p=0.000 n=8+8) name old speed new speed delta GobDecode-8 47.6MB/s ± 1% 46.5MB/s ± 1% -2.28% (p=0.000 n=8+8) GobEncode-8 48.1MB/s ± 0% 48.6MB/s ± 1% +1.02% (p=0.000 n=8+7) Gzip-8 28.1MB/s ± 1% 29.0MB/s ± 0% +2.97% (p=0.000 n=8+8) Gunzip-8 178MB/s ± 1% 179MB/s ± 2% ~ (p=0.694 n=7+8) JSONEncode-8 38.4MB/s ± 1% 39.4MB/s ± 0% +2.67% (p=0.001 n=7+7) JSONDecode-8 14.3MB/s ± 2% 14.2MB/s ± 1% -0.81% (p=0.043 n=8+7) GoParse-8 6.12MB/s ± 1% 5.99MB/s ± 1% -2.00% (p=0.000 n=8+8) RegexpMatchEasy0_32-8 111MB/s ± 2% 115MB/s ± 2% +3.77% (p=0.000 n=8+8) RegexpMatchEasy0_1K-8 618MB/s ± 1% 604MB/s ± 2% -2.16% (p=0.001 n=7+7) RegexpMatchEasy1_32-8 95.7MB/s ± 1% 105.1MB/s ± 2% +9.76% (p=0.000 n=8+8) RegexpMatchEasy1_1K-8 479MB/s ± 2% 477MB/s ± 0% ~ (p=0.105 n=8+8) RegexpMatchMedium_32-8 75.2MB/s ± 1% 75.2MB/s ± 0% ~ (p=0.247 n=7+7) RegexpMatchMedium_1K-8 12.6MB/s ± 3% 12.7MB/s ± 1% ~ (p=0.538 n=7+8) RegexpMatchHard_32-8 7.52MB/s ± 0% 7.52MB/s ± 0% ~ (p=0.968 n=7+8) RegexpMatchHard_1K-8 8.26MB/s ± 0% 8.24MB/s ± 0% -0.30% (p=0.001 n=8+8) Revcomp-8 173MB/s ± 2% 176MB/s ± 1% +1.68% (p=0.003 n=8+8) Template-8 9.85MB/s ± 2% 9.69MB/s ± 3% -1.59% (p=0.021 n=8+8) Fixes #39303 Updates #38740 Change-Id: I0a5f87bfda679f66414c0041ace2ca2e28363f36 Reviewed-on: https://go-review.googlesource.com/c/go/+/236637 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'test')
-rw-r--r--test/codegen/comparisons.go33
1 files changed, 28 insertions, 5 deletions
diff --git a/test/codegen/comparisons.go b/test/codegen/comparisons.go
index eb2f3317c95..90808573c20 100644
--- a/test/codegen/comparisons.go
+++ b/test/codegen/comparisons.go
@@ -253,6 +253,8 @@ func CmpLogicalToZero(a, b, c uint32, d, e uint64) uint64 {
// 'comparing to zero' expressions
// var + const
+// 'x-const' might be canonicalized to 'x+(-const)', so we check both
+// CMN and CMP for subtraction expressions to make the pattern robust.
func CmpToZero_ex1(a int64, e int32) int {
// arm64:`CMN`,-`ADD`,`(BMI|BPL)`
if a+3 < 0 {
@@ -269,37 +271,41 @@ func CmpToZero_ex1(a int64, e int32) int {
return 2
}
- // arm64:`CMP`,-`SUB`,`(BMI|BPL)`
+ // arm64:`CMP|CMN`,-`(ADD|SUB)`,`(BMI|BPL)`
if a-7 < 0 {
return 3
}
- // arm64:`CMP`,-`SUB`,`(BMI|BPL)`
+ // arm64:`CMP|CMN`,-`(ADD|SUB)`,`(BMI|BPL)`
if a-11 >= 0 {
return 4
}
- // arm64:`CMP`,-`SUB`,`BEQ`,`(BMI|BPL)`
+ // arm64:`CMP|CMN`,-`(ADD|SUB)`,`BEQ`,`(BMI|BPL)`
if a-19 > 0 {
return 4
}
// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+ // arm:`CMN`,-`ADD`,`(BMI|BPL)`
if e+3 < 0 {
return 5
}
// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+ // arm:`CMN`,-`ADD`,`(BMI|BPL)`
if e+13 >= 0 {
return 6
}
- // arm64:`CMPW`,-`SUBW`,`(BMI|BPL)`
+ // arm64:`CMPW|CMNW`,`(BMI|BPL)`
+ // arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)`
if e-7 < 0 {
return 7
}
- // arm64:`CMPW`,-`SUBW`,`(BMI|BPL)`
+ // arm64:`CMPW|CMNW`,`(BMI|BPL)`
+ // arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)`
if e-11 >= 0 {
return 8
}
@@ -326,11 +332,13 @@ func CmpToZero_ex2(a, b, c int64, e, f, g int32) int {
}
// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+ // arm:`CMN`,-`ADD`,`(BMI|BPL)`
if e+f < 0 {
return 5
}
// arm64:`CMNW`,-`ADDW`,`(BMI|BPL)`
+ // arm:`CMN`,-`ADD`,`(BMI|BPL)`
if f+g >= 0 {
return 6
}
@@ -350,11 +358,13 @@ func CmpToZero_ex3(a, b, c, d int64, e, f, g, h int32) int {
}
// arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)`
+ // arm:`CMN`,-`MULA`,`MUL`,`BEQ`,`(BMI|BPL)`
if e+f*g > 0 {
return 5
}
// arm64:`CMNW`,-`MADDW`,`MULW`,`BEQ`,`(BMI|BPL)`
+ // arm:`CMN`,-`MULA`,`MUL`,`BEQ`,`(BMI|BPL)`
if f+g*h <= 0 {
return 6
}
@@ -384,3 +394,16 @@ func CmpToZero_ex4(a, b, c, d int64, e, f, g, h int32) int {
}
return 0
}
+
+func CmpToZero_ex5(e, f int32, u uint32) int {
+ // arm:`CMN`,-`ADD`,`BEQ`,`(BMI|BPL)`
+ if e+f<<1 > 0 {
+ return 1
+ }
+
+ // arm:`CMP`,-`SUB`,`(BMI|BPL)`
+ if f-int32(u>>2) >= 0 {
+ return 2
+ }
+ return 0
+}