aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRick Hudson <rlh@golang.org>2015-02-19 18:11:24 -0500
committerRick Hudson <rlh@golang.org>2015-02-23 18:52:43 +0000
commit99482f2f9e7710206386ff45869cb76a53e0ac76 (patch)
tree7d0d9c476fcf6d82035bc3c124e7a0e70766e453
parent96333a7e4841c37d4c70cdab8748f9e69012cf29 (diff)
downloadgo-99482f2f9e7710206386ff45869cb76a53e0ac76.tar.gz
go-99482f2f9e7710206386ff45869cb76a53e0ac76.zip
runtime: Add prefetch to allocation code
The routine mallocgc retrieves objects from freelists. Prefetch the object that will be returned in the next call to mallocgc. Experiments indicate that this produces a 1% improvement when using prefetchnta and less when using prefetcht0, prefetcht1, or prefetcht2. Benchmark numbers indicate a 1% improvement over no prefetch, much less over prefetcht0, prefetcht1, and prefetcht2. These numbers were for the garbage benchmark with MAXPROCS=4 no prefetch >> 5.96 / 5.77 / 5.89 prefetcht0(uintptr(v.ptr().next)) >> 5.88 / 6.17 / 5.84 prefetcht1(uintptr(v.ptr().next)) >> 5.88 / 5.89 / 5.91 prefetcht2(uintptr(v.ptr().next)) >> 5.87 / 6.47 / 5.92 prefetchnta(uintptr(v.ptr().next)) >> 5.72 / 5.84 / 5.85 Change-Id: I54e07172081cccb097d5b5ce8789d74daa055ed9 Reviewed-on: https://go-review.googlesource.com/5350 Reviewed-by: Dmitry Vyukov <dvyukov@google.com> Reviewed-by: Austin Clements <austin@google.com>
-rw-r--r--src/runtime/malloc.go6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 475f97fd05..fac5ca49ce 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -553,7 +553,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
}
s.freelist = v.ptr().next
s.ref++
- //TODO: prefetch v.next
+ // prefetchnta offers best performance, see change list message.
+ prefetchnta(uintptr(v.ptr().next))
x = unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
(*[2]uint64)(x)[1] = 0
@@ -584,7 +585,8 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
}
s.freelist = v.ptr().next
s.ref++
- //TODO: prefetch
+ // prefetchnta offers best performance, see change list message.
+ prefetchnta(uintptr(v.ptr().next))
x = unsafe.Pointer(v)
if flags&flagNoZero == 0 {
v.ptr().next = 0