aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/memclr_ppc64x.s
diff options
context:
space:
mode:
authorMichael Hudson-Doyle <michael.hudson@canonical.com>2015-09-22 22:35:52 +1200
committerMichael Hudson-Doyle <michael.hudson@canonical.com>2015-10-02 07:50:52 +0000
commit2c911143fda5f9d1aa46d02cc80bd60f9ba17775 (patch)
treea05bb0861f5d3211e71461cd16c7ad9631950c5e /src/runtime/memclr_ppc64x.s
parent9fb79380f069348b317865aafa4023d3013137cb (diff)
downloadgo-2c911143fda5f9d1aa46d02cc80bd60f9ba17775.tar.gz
go-2c911143fda5f9d1aa46d02cc80bd60f9ba17775.zip
runtime: adjust the ppc64x memmove and memclr to copy by word as much as it can
Issue #12552 can happen on ppc64 too, although much less frequently in my testing. I'm fairly sure this fixes it (2 out of 200 runs of oracle.test failed without this change and 0 of 200 failed with it). It's also a lot faster for large moves/clears: name old speed new speed delta Memmove1-6 157MB/s ± 9% 144MB/s ± 0% -8.20% (p=0.004 n=10+9) Memmove2-6 281MB/s ± 1% 249MB/s ± 1% -11.53% (p=0.000 n=10+10) Memmove3-6 376MB/s ± 1% 328MB/s ± 1% -12.64% (p=0.000 n=10+10) Memmove4-6 475MB/s ± 4% 345MB/s ± 1% -27.28% (p=0.000 n=10+8) Memmove5-6 540MB/s ± 1% 393MB/s ± 0% -27.21% (p=0.000 n=10+10) Memmove6-6 609MB/s ± 0% 423MB/s ± 0% -30.56% (p=0.000 n=9+10) Memmove7-6 659MB/s ± 0% 468MB/s ± 0% -28.99% (p=0.000 n=8+10) Memmove8-6 705MB/s ± 0% 1295MB/s ± 1% +83.73% (p=0.000 n=9+9) Memmove9-6 740MB/s ± 1% 1241MB/s ± 1% +67.61% (p=0.000 n=10+8) Memmove10-6 780MB/s ± 0% 1162MB/s ± 1% +48.95% (p=0.000 n=10+9) Memmove11-6 811MB/s ± 0% 1180MB/s ± 0% +45.58% (p=0.000 n=8+9) Memmove12-6 820MB/s ± 1% 1073MB/s ± 1% +30.83% (p=0.000 n=10+9) Memmove13-6 849MB/s ± 0% 1068MB/s ± 1% +25.87% (p=0.000 n=10+10) Memmove14-6 877MB/s ± 0% 911MB/s ± 0% +3.83% (p=0.000 n=10+10) Memmove15-6 893MB/s ± 0% 922MB/s ± 0% +3.25% (p=0.000 n=10+9) Memmove16-6 897MB/s ± 1% 2418MB/s ± 1% +169.67% (p=0.000 n=10+9) Memmove32-6 908MB/s ± 0% 3927MB/s ± 2% +332.64% (p=0.000 n=10+8) Memmove64-6 1.11GB/s ± 0% 5.59GB/s ± 0% +404.64% (p=0.000 n=9+9) Memmove128-6 1.25GB/s ± 0% 6.71GB/s ± 2% +437.49% (p=0.000 n=9+10) Memmove256-6 1.33GB/s ± 0% 7.25GB/s ± 1% +445.06% (p=0.000 n=10+10) Memmove512-6 1.38GB/s ± 0% 8.87GB/s ± 0% +544.43% (p=0.000 n=10+10) Memmove1024-6 1.40GB/s ± 0% 10.00GB/s ± 0% +613.80% (p=0.000 n=10+10) Memmove2048-6 1.41GB/s ± 0% 10.65GB/s ± 0% +652.95% (p=0.000 n=9+10) Memmove4096-6 1.42GB/s ± 0% 11.01GB/s ± 0% +675.37% (p=0.000 n=8+10) Memclr5-6 269MB/s ± 1% 264MB/s ± 0% -1.80% (p=0.000 n=10+10) Memclr16-6 600MB/s ± 0% 887MB/s ± 1% +47.83% (p=0.000 n=10+10) Memclr64-6 1.06GB/s ± 0% 2.91GB/s ± 1% +174.58% (p=0.000 n=8+10) Memclr256-6 1.32GB/s ± 0% 6.58GB/s ± 0% +399.86% (p=0.000 n=9+10) Memclr4096-6 1.42GB/s ± 0% 10.90GB/s ± 0% +668.03% (p=0.000 n=8+10) Memclr65536-6 1.43GB/s ± 0% 11.37GB/s ± 0% +697.83% (p=0.000 n=9+8) GoMemclr5-6 359MB/s ± 0% 360MB/s ± 0% +0.46% (p=0.000 n=10+10) GoMemclr16-6 750MB/s ± 0% 1264MB/s ± 1% +68.45% (p=0.000 n=10+10) GoMemclr64-6 1.17GB/s ± 0% 3.78GB/s ± 1% +223.58% (p=0.000 n=10+9) GoMemclr256-6 1.35GB/s ± 0% 7.47GB/s ± 0% +452.44% (p=0.000 n=10+10) Update #12552 Change-Id: I7192e9deb9684a843aed37f58a16a4e29970e893 Reviewed-on: https://go-review.googlesource.com/14840 Reviewed-by: Minux Ma <minux@golang.org>
Diffstat (limited to 'src/runtime/memclr_ppc64x.s')
-rw-r--r--src/runtime/memclr_ppc64x.s17
1 files changed, 14 insertions, 3 deletions
diff --git a/src/runtime/memclr_ppc64x.s b/src/runtime/memclr_ppc64x.s
index cea42cb70c..90e27482ff 100644
--- a/src/runtime/memclr_ppc64x.s
+++ b/src/runtime/memclr_ppc64x.s
@@ -10,11 +10,22 @@
TEXT runtime·memclr(SB),NOSPLIT,$0-16
MOVD ptr+0(FP), R3
MOVD n+8(FP), R4
- CMP R4, $0
+ SRADCC $3, R4, R6 // R6 is the number of words to zero
+ BEQ bytes
+
+ SUB $8, R3
+ MOVD R6, CTR
+ MOVDU R0, 8(R3)
+ BC 25, 0, -1(PC) // bdnz+ $-4
+ ADD $8, R3
+
+bytes:
+ ANDCC $7, R4, R7 // R7 is the number of bytes to zero
BEQ done
SUB $1, R3
- MOVD R4, CTR
+ MOVD R7, CTR
MOVBU R0, 1(R3)
- BC 25, 0, -1(PC) // bdnz+ $-4
+ BC 25, 0, -1(PC) // bdnz+ $-4
+
done:
RET