aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/memmove_amd64.s
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2016-03-06 16:58:30 -0800
committerKeith Randall <khr@golang.org>2016-03-21 19:10:24 +0000
commit6a33f7765f79cf2f00f5ca55832d2cfab8beb289 (patch)
treed1cd6a803e32d952e60290c4c863c65bd88db808 /src/runtime/memmove_amd64.s
parentb07a214d39814545bbcd1d30f1850a95752dac65 (diff)
downloadgo-6a33f7765f79cf2f00f5ca55832d2cfab8beb289.tar.gz
go-6a33f7765f79cf2f00f5ca55832d2cfab8beb289.zip
runtime: use MOVSB instead of MOVSQ for unaligned moves
MOVSB is quite a bit faster for unaligned moves. Possibly we should use MOVSB all of the time, but Intel folks say it might be a bit faster to use MOVSQ on some processors (but not any I have access to at the moment). benchmark old ns/op new ns/op delta BenchmarkMemmove4096-8 93.9 93.2 -0.75% BenchmarkMemmoveUnalignedDst4096-8 256 151 -41.02% BenchmarkMemmoveUnalignedSrc4096-8 175 90.5 -48.29% Fixes #14630 Change-Id: I568e6d6590eb3615e6a699fb474020596be665ff Reviewed-on: https://go-review.googlesource.com/20293 Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/runtime/memmove_amd64.s')
-rw-r--r--src/runtime/memmove_amd64.s13
1 files changed, 13 insertions, 0 deletions
diff --git a/src/runtime/memmove_amd64.s b/src/runtime/memmove_amd64.s
index ae95b155be..514eb169f1 100644
--- a/src/runtime/memmove_amd64.s
+++ b/src/runtime/memmove_amd64.s
@@ -77,12 +77,25 @@ forward:
CMPQ BX, $2048
JLS move_256through2048
+ // Check alignment
+ MOVQ SI, AX
+ ORQ DI, AX
+ TESTL $7, AX
+ JNE unaligned_fwd
+
+ // Aligned - do 8 bytes at a time
MOVQ BX, CX
SHRQ $3, CX
ANDQ $7, BX
REP; MOVSQ
JMP tail
+unaligned_fwd:
+ // Unaligned - do 1 byte at a time
+ MOVQ BX, CX
+ REP; MOVSB
+ RET
+
back:
/*
* check overlap