diff options
author | Archana R <aravind5@in.ibm.com> | 2021-10-13 03:50:06 -0500 |
---|---|---|
committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2021-10-19 18:22:59 +0000 |
commit | 6c3cd5d2eb7149c9c1ced7d70c3f4157f27c1588 (patch) | |
tree | 64ef7d5438d1d9e5fd2a8a4464e8fbae0194f0d7 /src/internal | |
parent | ad7db1f90fb66f00f5b020360aabd9f27d1c764f (diff) | |
download | go-6c3cd5d2eb7149c9c1ced7d70c3f4157f27c1588.tar.gz go-6c3cd5d2eb7149c9c1ced7d70c3f4157f27c1588.zip |
internal/bytealg: port bytes.Index and bytes.Count to reg ABI on ppc64x
This change adds support for the reg ABI to the Index and Count
functions for ppc64/ppc64le.
Most Index and Count benchmarks show improvement in performance on
POWER9 with this change. Similar numbers observed on POWER8 and POWER10.
name old time/op new time/op delta
Index/32 71.0ns ± 0% 67.9ns ± 0% -4.42% (p=0.001 n=7+6)
IndexEasy/10 17.5ns ± 0% 17.2ns ± 0% -1.30% (p=0.001 n=7+7)
name old time/op new time/op delta
Count/10 26.6ns ± 0% 25.0ns ± 1% -6.02% (p=0.001 n=7+7)
Count/32 78.6ns ± 0% 74.7ns ± 0% -4.97% (p=0.001 n=7+7)
Count/4K 5.03µs ± 0% 5.03µs ± 0% -0.07% (p=0.000 n=6+7)
CountEasy/10 26.9ns ± 0% 25.2ns ± 1% -6.31% (p=0.001 n=7+7)
CountSingle/32 11.8ns ± 0% 9.9ns ± 0% -15.70% (p=0.002 n=6+6)
Change-Id: Ibd146c04f8107291c55f9e6100b8264dfccc41ae
Reviewed-on: https://go-review.googlesource.com/c/go/+/355509
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Run-TryBot: Cherry Mui <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Diffstat (limited to 'src/internal')
-rw-r--r-- | src/internal/bytealg/count_ppc64x.s | 24 | ||||
-rw-r--r-- | src/internal/bytealg/index_ppc64x.s | 39 |
2 files changed, 58 insertions, 5 deletions
diff --git a/src/internal/bytealg/count_ppc64x.s b/src/internal/bytealg/count_ppc64x.s index 94163cbd8a..dbafd06edc 100644 --- a/src/internal/bytealg/count_ppc64x.s +++ b/src/internal/bytealg/count_ppc64x.s @@ -8,24 +8,37 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·Count(SB), NOSPLIT|NOFRAME, $0-40 +TEXT ·Count<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 +#ifdef GOEXPERIMENT_regabiargs +// R3 = byte array pointer +// R4 = length + MOVBZ R6,R5 // R5 = byte +#else + MOVD b_base+0(FP), R3 // R3 = byte array pointer MOVD b_len+8(FP), R4 // R4 = length MOVBZ c+24(FP), R5 // R5 = byte MOVD $ret+32(FP), R14 // R14 = &ret +#endif BR countbytebody<>(SB) -TEXT ·CountString(SB), NOSPLIT|NOFRAME, $0-32 +TEXT ·CountString<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-32 +#ifdef GOEXPERIMENT_regabiargs +// R3 = byte array pointer +// R4 = length + MOVBZ R5,R5 // R5 = byte +#else MOVD s_base+0(FP), R3 // R3 = string MOVD s_len+8(FP), R4 // R4 = length MOVBZ c+16(FP), R5 // R5 = byte MOVD $ret+24(FP), R14 // R14 = &ret +#endif BR countbytebody<>(SB) // R3: addr of string // R4: len of string // R5: byte to count -// R14: addr for return value +// R14: addr for return value when not regabi // endianness shouldn't matter since we are just counting and order // is irrelevant TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0 @@ -94,5 +107,10 @@ next2: BR small done: +#ifdef GOEXPERIMENT_regabiargs + MOVD R18, R3 // return count +#else MOVD R18, (R14) // return count +#endif + RET diff --git a/src/internal/bytealg/index_ppc64x.s b/src/internal/bytealg/index_ppc64x.s index 3ed9442125..f587a8ae25 100644 --- a/src/internal/bytealg/index_ppc64x.s +++ b/src/internal/bytealg/index_ppc64x.s @@ -46,12 +46,20 @@ DATA byteswap<>+8(SB)/8, $0x0f0e0d0c0b0a0908 GLOBL byteswap<>+0(SB), RODATA, $16 -TEXT ·Index(SB), NOSPLIT|NOFRAME, $0-56 +TEXT ·Index<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56 +#ifdef GOEXPERIMENT_regabiargs +// R3 = byte array pointer +// R4 = length + MOVD R6,R5 // R5 = separator pointer + MOVD R7,R6 // R6 = separator length +#else MOVD a_base+0(FP), R3 // R3 = byte array pointer MOVD a_len+8(FP), R4 // R4 = length MOVD b_base+24(FP), R5 // R5 = separator pointer MOVD b_len+32(FP), R6 // R6 = separator length MOVD $ret+48(FP), R14 // R14 = &ret +#endif + #ifdef GOARCH_ppc64le MOVBZ internal∕cpu·PPC64+const_offsetPPC64HasPOWER9(SB), R7 @@ -63,12 +71,15 @@ TEXT ·Index(SB), NOSPLIT|NOFRAME, $0-56 power8: BR indexbody<>(SB) -TEXT ·IndexString(SB), NOSPLIT|NOFRAME, $0-40 +TEXT ·IndexString<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 +#ifndef GOEXPERIMENT_regabiargs MOVD a_base+0(FP), R3 // R3 = string MOVD a_len+8(FP), R4 // R4 = length MOVD b_base+16(FP), R5 // R5 = separator pointer MOVD b_len+24(FP), R6 // R6 = separator length MOVD $ret+32(FP), R14 // R14 = &ret +#endif + #ifdef GOARCH_ppc64le MOVBZ internal∕cpu·PPC64+const_offsetPPC64HasPOWER9(SB), R7 @@ -420,8 +431,12 @@ next17: BR index17to32loop // Continue notfound: +#ifdef GOEXPERIMENT_regabiargs + MOVD $-1, R3 // Return -1 if not found +#else MOVD $-1, R8 // Return -1 if not found MOVD R8, (R14) +#endif RET index33plus: @@ -432,12 +447,20 @@ foundR25: SRD $3, R25 // Convert from bits to bytes ADD R25, R7 // Add to current string address SUB R3, R7 // Subtract from start of string +#ifdef GOEXPERIMENT_regabiargs + MOVD R7, R3 // Return byte where found +#else MOVD R7, (R14) // Return byte where found +#endif RET found: SUB R3, R7 // Return byte where found +#ifdef GOEXPERIMENT_regabiargs + MOVD R7, R3 +#else MOVD R7, (R14) +#endif RET TEXT indexbodyp9<>(SB), NOSPLIT|NOFRAME, $0 @@ -746,8 +769,12 @@ next17: BR index17to32loop // Continue notfound: +#ifdef GOEXPERIMENT_regabiargs + MOVD $-1, R3 // Return -1 if not found +#else MOVD $-1, R8 // Return -1 if not found MOVD R8, (R14) +#endif RET index33plus: @@ -758,11 +785,19 @@ foundR25: SRD $3, R25 // Convert from bits to bytes ADD R25, R7 // Add to current string address SUB R3, R7 // Subtract from start of string +#ifdef GOEXPERIMENT_regabiargs + MOVD R7, R3 // Return byte where found +#else MOVD R7, (R14) // Return byte where found +#endif RET found: SUB R3, R7 // Return byte where found +#ifdef GOEXPERIMENT_regabiargs + MOVD R7, R3 +#else MOVD R7, (R14) +#endif RET |