diff options
author | Wei Xiao <wei.xiao@arm.com> | 2017-07-27 01:55:03 +0000 |
---|---|---|
committer | Cherry Zhang <cherryyz@google.com> | 2017-08-25 20:09:06 +0000 |
commit | c02fc1605ad1816f95f61811883dbcdb38a9aec6 (patch) | |
tree | 4a73cdd269e29b0f08e210e34ebcded11a4f6ff0 /src/cmd/compile/internal/ssa/gen/ARM64Ops.go | |
parent | 9c99512d188b01557f5271b2c65d814487817920 (diff) | |
download | go-c02fc1605ad1816f95f61811883dbcdb38a9aec6.tar.gz go-c02fc1605ad1816f95f61811883dbcdb38a9aec6.zip |
cmd/compile: memory clearing optimization for arm64
Use "STP (ZR, ZR), O(R)" instead of "MOVD ZR, O(R)" to implement memory clearing.
Also improve assembler supports to STP/LDP.
Results (A57@2GHzx8):
benchmark old ns/op new ns/op delta
BenchmarkClearFat8-8 1.00 1.00 +0.00%
BenchmarkClearFat12-8 1.01 1.01 +0.00%
BenchmarkClearFat16-8 1.01 1.01 +0.00%
BenchmarkClearFat24-8 1.52 1.52 +0.00%
BenchmarkClearFat32-8 3.00 2.02 -32.67%
BenchmarkClearFat40-8 3.50 2.52 -28.00%
BenchmarkClearFat48-8 3.50 3.03 -13.43%
BenchmarkClearFat56-8 4.00 3.50 -12.50%
BenchmarkClearFat64-8 4.25 4.00 -5.88%
BenchmarkClearFat128-8 8.01 8.01 +0.00%
BenchmarkClearFat256-8 16.1 16.0 -0.62%
BenchmarkClearFat512-8 32.1 32.0 -0.31%
BenchmarkClearFat1024-8 64.1 64.1 +0.00%
Change-Id: Ie5f5eac271ff685884775005825f206167a5c146
Reviewed-on: https://go-review.googlesource.com/55610
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/ARM64Ops.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/gen/ARM64Ops.go | 13 |
1 files changed, 8 insertions, 5 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index 3b3d494c54..1cac97f3ae 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -144,6 +144,7 @@ func init() { gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gpstore0 = regInfo{inputs: []regMask{gpspsbg}} + gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} gpxchg = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} gpcas = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}, outputs: []regMask{gp}} fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} @@ -275,13 +276,15 @@ func init() { {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVDstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. + {name: "STP", argLength: 4, reg: gpstore2, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of arg1 and arg2 to arg0 + auxInt + aux. arg3=mem. {name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem. - {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. ar12=mem. + {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. arg1=mem. + {name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem. // conversions {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte @@ -347,7 +350,7 @@ func init() { aux: "Int64", argLength: 2, reg: regInfo{ - inputs: []regMask{gp}, + inputs: []regMask{buildReg("R16")}, clobbers: buildReg("R16 R30"), }, faultOnNilArg0: true, @@ -355,14 +358,14 @@ func init() { // large zeroing // arg0 = address of memory to zero (in R16 aka arm64.REGRT1, changed as side effect) - // arg1 = address of the last element to zero + // arg1 = address of the last 16-byte unit to zero // arg2 = mem // returns mem - // MOVD.P ZR, 8(R16) + // STP.P (ZR,ZR), 16(R16) // CMP Rarg1, R16 // BLE -2(PC) // Note: the-end-of-the-memory may be not a valid pointer. it's a problem if it is spilled. - // the-end-of-the-memory - 8 is with the area to zero, ok to spill. + // the-end-of-the-memory - 16 is with the area to zero, ok to spill. { name: "LoweredZero", argLength: 3, |