diff options
author | Dmitry Vyukov <dvyukov@google.com> | 2016-06-27 12:23:39 +0200 |
---|---|---|
committer | Dmitry Vyukov <dvyukov@google.com> | 2016-08-19 21:24:28 +0000 |
commit | 14e59511661303ab1406f7c21ee27e58bcd0750e (patch) | |
tree | afe916cde99047bd7e258f5535a1b64c1042d495 /src/runtime/msize.go | |
parent | ab9137dd24e10a9f884475413437cc31e48dbdf7 (diff) | |
download | go-14e59511661303ab1406f7c21ee27e58bcd0750e.tar.gz go-14e59511661303ab1406f7c21ee27e58bcd0750e.zip |
runtime: increase malloc size classes
When we calculate class sizes, in some cases we discard considerable
amounts of memory without an apparent reason. For example, we choose
size 8448 with 6 objects in 7 pages. But we can well use object
size 9472, which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
Increase class sizes to the max value that leads to the same
page count/number of objects. Full list of affected size classes:
class 36: pages: 2 size: 1664->1792 +128 (7.69%)
class 39: pages: 1 size: 2560->2688 +128 (5.0%)
class 40: pages: 3 size: 2816->3072 +256 (9.9%)
class 41: pages: 2 size: 3072->3200 +128 (4.16%)
class 42: pages: 3 size: 3328->3456 +128 (3.84%)
class 44: pages: 3 size: 4608->4864 +256 (5.55%)
class 47: pages: 4 size: 6400->6528 +128 (2.0%)
class 48: pages: 5 size: 6656->6784 +128 (1.92%)
class 51: pages: 7 size: 8448->9472 +1024 (12.12%)
class 52: pages: 6 size: 8704->9728 +1024 (11.76%)
class 53: pages: 5 size: 9472->10240 +768 (8.10%)
class 54: pages: 4 size: 10496->10880 +384 (3.65%)
class 57: pages: 7 size: 14080->14336 +256 (1.81%)
class 59: pages: 9 size: 16640->18432 +1792 (10.76%)
class 60: pages: 7 size: 17664->19072 +1408 (7.97%)
class 62: pages: 8 size: 21248->21760 +512 (2.40%)
class 64: pages: 10 size: 24832->27264 +2432 (9.79%)
class 65: pages: 7 size: 28416->28672 +256 (0.90%)
name old time/op new time/op delta
BinaryTree17-12 2.59s ± 5% 2.52s ± 4% ~ (p=0.132 n=6+6)
Fannkuch11-12 2.13s ± 3% 2.17s ± 3% ~ (p=0.180 n=6+6)
FmtFprintfEmpty-12 47.0ns ± 3% 46.6ns ± 1% ~ (p=0.355 n=6+5)
FmtFprintfString-12 131ns ± 0% 131ns ± 1% ~ (p=0.476 n=4+6)
FmtFprintfInt-12 121ns ± 6% 122ns ± 2% ~ (p=0.511 n=6+6)
FmtFprintfIntInt-12 182ns ± 2% 186ns ± 1% +2.20% (p=0.015 n=6+6)
FmtFprintfPrefixedInt-12 184ns ± 5% 181ns ± 2% ~ (p=0.645 n=6+6)
FmtFprintfFloat-12 272ns ± 7% 265ns ± 1% ~ (p=1.000 n=6+5)
FmtManyArgs-12 783ns ± 2% 802ns ± 2% +2.38% (p=0.017 n=6+6)
GobDecode-12 7.04ms ± 4% 7.00ms ± 2% ~ (p=1.000 n=6+6)
GobEncode-12 6.36ms ± 6% 6.17ms ± 6% ~ (p=0.240 n=6+6)
Gzip-12 242ms ±14% 233ms ± 7% ~ (p=0.310 n=6+6)
Gunzip-12 36.6ms ±22% 36.0ms ± 9% ~ (p=0.841 n=5+5)
HTTPClientServer-12 93.1µs ±29% 88.0µs ±32% ~ (p=0.240 n=6+6)
JSONEncode-12 27.1ms ±39% 26.2ms ±35% ~ (p=0.589 n=6+6)
JSONDecode-12 71.7ms ±36% 71.5ms ±36% ~ (p=0.937 n=6+6)
Mandelbrot200-12 4.78ms ±10% 4.70ms ±16% ~ (p=0.394 n=6+6)
GoParse-12 4.86ms ±34% 4.95ms ±36% ~ (p=1.000 n=6+6)
RegexpMatchEasy0_32-12 110ns ±37% 110ns ±36% ~ (p=0.660 n=6+6)
RegexpMatchEasy0_1K-12 240ns ±38% 234ns ±47% ~ (p=0.554 n=6+6)
RegexpMatchEasy1_32-12 77.2ns ± 2% 77.2ns ±10% ~ (p=0.699 n=6+6)
RegexpMatchEasy1_1K-12 337ns ± 5% 331ns ± 4% ~ (p=0.552 n=6+6)
RegexpMatchMedium_32-12 125ns ±13% 132ns ±26% ~ (p=0.561 n=6+6)
RegexpMatchMedium_1K-12 35.9µs ± 3% 36.1µs ± 5% ~ (p=0.818 n=6+6)
RegexpMatchHard_32-12 1.81µs ± 4% 1.82µs ± 5% ~ (p=0.452 n=5+5)
RegexpMatchHard_1K-12 52.4µs ± 2% 54.4µs ± 3% +3.84% (p=0.002 n=6+6)
Revcomp-12 401ms ± 2% 390ms ± 1% -2.82% (p=0.002 n=6+6)
Template-12 54.5ms ± 3% 54.6ms ± 1% ~ (p=0.589 n=6+6)
TimeParse-12 294ns ± 1% 298ns ± 2% ~ (p=0.160 n=6+6)
TimeFormat-12 323ns ± 4% 318ns ± 5% ~ (p=0.297 n=6+6)
name old speed new speed delta
GobDecode-12 109MB/s ± 4% 110MB/s ± 2% ~ (p=1.000 n=6+6)
GobEncode-12 121MB/s ± 6% 125MB/s ± 6% ~ (p=0.240 n=6+6)
Gzip-12 80.4MB/s ±12% 83.3MB/s ± 7% ~ (p=0.310 n=6+6)
Gunzip-12 495MB/s ±41% 541MB/s ± 9% ~ (p=0.931 n=6+5)
JSONEncode-12 80.7MB/s ±39% 82.8MB/s ±34% ~ (p=0.589 n=6+6)
JSONDecode-12 30.4MB/s ±40% 31.0MB/s ±37% ~ (p=0.937 n=6+6)
GoParse-12 13.2MB/s ±33% 13.2MB/s ±35% ~ (p=1.000 n=6+6)
RegexpMatchEasy0_32-12 321MB/s ±34% 326MB/s ±34% ~ (p=0.699 n=6+6)
RegexpMatchEasy0_1K-12 4.49GB/s ±31% 4.74GB/s ±37% ~ (p=0.589 n=6+6)
RegexpMatchEasy1_32-12 414MB/s ± 2% 415MB/s ± 9% ~ (p=0.699 n=6+6)
RegexpMatchEasy1_1K-12 3.03GB/s ± 5% 3.09GB/s ± 4% ~ (p=0.699 n=6+6)
RegexpMatchMedium_32-12 7.99MB/s ±12% 7.68MB/s ±22% ~ (p=0.589 n=6+6)
RegexpMatchMedium_1K-12 28.5MB/s ± 3% 28.4MB/s ± 5% ~ (p=0.818 n=6+6)
RegexpMatchHard_32-12 17.7MB/s ± 4% 17.0MB/s ±15% ~ (p=0.351 n=5+6)
RegexpMatchHard_1K-12 19.6MB/s ± 2% 18.8MB/s ± 3% -3.67% (p=0.002 n=6+6)
Revcomp-12 634MB/s ± 2% 653MB/s ± 1% +2.89% (p=0.002 n=6+6)
Template-12 35.6MB/s ± 3% 35.5MB/s ± 1% ~ (p=0.615 n=6+6)
Change-Id: I465a47f74227f316e3abea231444f48c7a30ef85
Reviewed-on: https://go-review.googlesource.com/24493
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/runtime/msize.go')
-rw-r--r-- | src/runtime/msize.go | 61 |
1 files changed, 44 insertions, 17 deletions
diff --git a/src/runtime/msize.go b/src/runtime/msize.go index 18577b309b..00c1e9d340 100644 --- a/src/runtime/msize.go +++ b/src/runtime/msize.go @@ -46,21 +46,27 @@ package runtime // size divided by 128 (rounded up). The arrays are filled in // by InitSizes. -var class_to_size [_NumSizeClasses]int32 -var class_to_allocnpages [_NumSizeClasses]int32 +const ( + smallSizeDiv = 8 + smallSizeMax = 1024 + largeSizeDiv = 128 +) + +var class_to_size [_NumSizeClasses]uint32 +var class_to_allocnpages [_NumSizeClasses]uint32 var class_to_divmagic [_NumSizeClasses]divMagic -var size_to_class8 [1024/8 + 1]int8 -var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8 +var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8 +var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8 -func sizeToClass(size int32) int32 { +func sizeToClass(size uint32) uint32 { if size > _MaxSmallSize { throw("invalid size") } - if size > 1024-8 { - return int32(size_to_class128[(size-1024+127)>>7]) + if size > smallSizeMax-8 { + return uint32(size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]) } - return int32(size_to_class8[(size+7)>>3]) + return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]) } func initSizes() { @@ -97,20 +103,41 @@ func initSizes() { // use just this size instead of having two // different sizes. if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) { - class_to_size[sizeclass-1] = int32(size) + class_to_size[sizeclass-1] = uint32(size) continue } - class_to_allocnpages[sizeclass] = int32(npages) - class_to_size[sizeclass] = int32(size) + class_to_allocnpages[sizeclass] = uint32(npages) + class_to_size[sizeclass] = uint32(size) sizeclass++ } if sizeclass != _NumSizeClasses { print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n") throw("bad NumSizeClasses") } + + // Increase object sizes if we can fit the same number of larger objects + // into the same number of pages. For example, we choose size 8448 above + // with 6 objects in 7 pages. But we can well use object size 9472, + // which is also 6 objects in 7 pages but +1024 bytes (+12.12%). + // We need to preserve at least largeSizeDiv alignment otherwise + // sizeToClass won't work. + for i := 1; i < _NumSizeClasses; i++ { + npages := class_to_allocnpages[i] + psize := npages * _PageSize + size := class_to_size[i] + new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1) + if new_size > size { + class_to_size[i] = new_size + } + } + // Check maxObjsPerSpan => number of objects invariant. for i, size := range class_to_size { + if i != 0 && class_to_size[i-1] >= size { + throw("non-monotonic size classes") + } + if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan { throw("span contains too many objects") } @@ -122,18 +149,18 @@ func initSizes() { nextsize := 0 for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ { for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 { - size_to_class8[nextsize/8] = int8(sizeclass) + size_to_class8[nextsize/8] = uint8(sizeclass) } if nextsize >= 1024 { for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 { - size_to_class128[(nextsize-1024)/128] = int8(sizeclass) + size_to_class128[(nextsize-1024)/128] = uint8(sizeclass) } } } // Double-check SizeToClass. if false { - for n := int32(0); n < _MaxSmallSize; n++ { + for n := uint32(0); n < _MaxSmallSize; n++ { sizeclass := sizeToClass(n) if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n { print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") @@ -186,10 +213,10 @@ dump: // Returns size of the memory block that mallocgc will allocate if you ask for the size. func roundupsize(size uintptr) uintptr { if size < _MaxSmallSize { - if size <= 1024-8 { - return uintptr(class_to_size[size_to_class8[(size+7)>>3]]) + if size <= smallSizeMax-8 { + return uintptr(class_to_size[size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]]) } else { - return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]]) + return uintptr(class_to_size[size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]]) } } if size+_PageSize < size { |