aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/msize.go
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2016-06-27 12:23:39 +0200
committerDmitry Vyukov <dvyukov@google.com>2016-08-19 21:24:28 +0000
commit14e59511661303ab1406f7c21ee27e58bcd0750e (patch)
treeafe916cde99047bd7e258f5535a1b64c1042d495 /src/runtime/msize.go
parentab9137dd24e10a9f884475413437cc31e48dbdf7 (diff)
downloadgo-14e59511661303ab1406f7c21ee27e58bcd0750e.tar.gz
go-14e59511661303ab1406f7c21ee27e58bcd0750e.zip
runtime: increase malloc size classes
When we calculate class sizes, in some cases we discard considerable amounts of memory without an apparent reason. For example, we choose size 8448 with 6 objects in 7 pages. But we can well use object size 9472, which is also 6 objects in 7 pages but +1024 bytes (+12.12%). Increase class sizes to the max value that leads to the same page count/number of objects. Full list of affected size classes: class 36: pages: 2 size: 1664->1792 +128 (7.69%) class 39: pages: 1 size: 2560->2688 +128 (5.0%) class 40: pages: 3 size: 2816->3072 +256 (9.9%) class 41: pages: 2 size: 3072->3200 +128 (4.16%) class 42: pages: 3 size: 3328->3456 +128 (3.84%) class 44: pages: 3 size: 4608->4864 +256 (5.55%) class 47: pages: 4 size: 6400->6528 +128 (2.0%) class 48: pages: 5 size: 6656->6784 +128 (1.92%) class 51: pages: 7 size: 8448->9472 +1024 (12.12%) class 52: pages: 6 size: 8704->9728 +1024 (11.76%) class 53: pages: 5 size: 9472->10240 +768 (8.10%) class 54: pages: 4 size: 10496->10880 +384 (3.65%) class 57: pages: 7 size: 14080->14336 +256 (1.81%) class 59: pages: 9 size: 16640->18432 +1792 (10.76%) class 60: pages: 7 size: 17664->19072 +1408 (7.97%) class 62: pages: 8 size: 21248->21760 +512 (2.40%) class 64: pages: 10 size: 24832->27264 +2432 (9.79%) class 65: pages: 7 size: 28416->28672 +256 (0.90%) name old time/op new time/op delta BinaryTree17-12 2.59s ± 5% 2.52s ± 4% ~ (p=0.132 n=6+6) Fannkuch11-12 2.13s ± 3% 2.17s ± 3% ~ (p=0.180 n=6+6) FmtFprintfEmpty-12 47.0ns ± 3% 46.6ns ± 1% ~ (p=0.355 n=6+5) FmtFprintfString-12 131ns ± 0% 131ns ± 1% ~ (p=0.476 n=4+6) FmtFprintfInt-12 121ns ± 6% 122ns ± 2% ~ (p=0.511 n=6+6) FmtFprintfIntInt-12 182ns ± 2% 186ns ± 1% +2.20% (p=0.015 n=6+6) FmtFprintfPrefixedInt-12 184ns ± 5% 181ns ± 2% ~ (p=0.645 n=6+6) FmtFprintfFloat-12 272ns ± 7% 265ns ± 1% ~ (p=1.000 n=6+5) FmtManyArgs-12 783ns ± 2% 802ns ± 2% +2.38% (p=0.017 n=6+6) GobDecode-12 7.04ms ± 4% 7.00ms ± 2% ~ (p=1.000 n=6+6) GobEncode-12 6.36ms ± 6% 6.17ms ± 6% ~ (p=0.240 n=6+6) Gzip-12 242ms ±14% 233ms ± 7% ~ (p=0.310 n=6+6) Gunzip-12 36.6ms ±22% 36.0ms ± 9% ~ (p=0.841 n=5+5) HTTPClientServer-12 93.1µs ±29% 88.0µs ±32% ~ (p=0.240 n=6+6) JSONEncode-12 27.1ms ±39% 26.2ms ±35% ~ (p=0.589 n=6+6) JSONDecode-12 71.7ms ±36% 71.5ms ±36% ~ (p=0.937 n=6+6) Mandelbrot200-12 4.78ms ±10% 4.70ms ±16% ~ (p=0.394 n=6+6) GoParse-12 4.86ms ±34% 4.95ms ±36% ~ (p=1.000 n=6+6) RegexpMatchEasy0_32-12 110ns ±37% 110ns ±36% ~ (p=0.660 n=6+6) RegexpMatchEasy0_1K-12 240ns ±38% 234ns ±47% ~ (p=0.554 n=6+6) RegexpMatchEasy1_32-12 77.2ns ± 2% 77.2ns ±10% ~ (p=0.699 n=6+6) RegexpMatchEasy1_1K-12 337ns ± 5% 331ns ± 4% ~ (p=0.552 n=6+6) RegexpMatchMedium_32-12 125ns ±13% 132ns ±26% ~ (p=0.561 n=6+6) RegexpMatchMedium_1K-12 35.9µs ± 3% 36.1µs ± 5% ~ (p=0.818 n=6+6) RegexpMatchHard_32-12 1.81µs ± 4% 1.82µs ± 5% ~ (p=0.452 n=5+5) RegexpMatchHard_1K-12 52.4µs ± 2% 54.4µs ± 3% +3.84% (p=0.002 n=6+6) Revcomp-12 401ms ± 2% 390ms ± 1% -2.82% (p=0.002 n=6+6) Template-12 54.5ms ± 3% 54.6ms ± 1% ~ (p=0.589 n=6+6) TimeParse-12 294ns ± 1% 298ns ± 2% ~ (p=0.160 n=6+6) TimeFormat-12 323ns ± 4% 318ns ± 5% ~ (p=0.297 n=6+6) name old speed new speed delta GobDecode-12 109MB/s ± 4% 110MB/s ± 2% ~ (p=1.000 n=6+6) GobEncode-12 121MB/s ± 6% 125MB/s ± 6% ~ (p=0.240 n=6+6) Gzip-12 80.4MB/s ±12% 83.3MB/s ± 7% ~ (p=0.310 n=6+6) Gunzip-12 495MB/s ±41% 541MB/s ± 9% ~ (p=0.931 n=6+5) JSONEncode-12 80.7MB/s ±39% 82.8MB/s ±34% ~ (p=0.589 n=6+6) JSONDecode-12 30.4MB/s ±40% 31.0MB/s ±37% ~ (p=0.937 n=6+6) GoParse-12 13.2MB/s ±33% 13.2MB/s ±35% ~ (p=1.000 n=6+6) RegexpMatchEasy0_32-12 321MB/s ±34% 326MB/s ±34% ~ (p=0.699 n=6+6) RegexpMatchEasy0_1K-12 4.49GB/s ±31% 4.74GB/s ±37% ~ (p=0.589 n=6+6) RegexpMatchEasy1_32-12 414MB/s ± 2% 415MB/s ± 9% ~ (p=0.699 n=6+6) RegexpMatchEasy1_1K-12 3.03GB/s ± 5% 3.09GB/s ± 4% ~ (p=0.699 n=6+6) RegexpMatchMedium_32-12 7.99MB/s ±12% 7.68MB/s ±22% ~ (p=0.589 n=6+6) RegexpMatchMedium_1K-12 28.5MB/s ± 3% 28.4MB/s ± 5% ~ (p=0.818 n=6+6) RegexpMatchHard_32-12 17.7MB/s ± 4% 17.0MB/s ±15% ~ (p=0.351 n=5+6) RegexpMatchHard_1K-12 19.6MB/s ± 2% 18.8MB/s ± 3% -3.67% (p=0.002 n=6+6) Revcomp-12 634MB/s ± 2% 653MB/s ± 1% +2.89% (p=0.002 n=6+6) Template-12 35.6MB/s ± 3% 35.5MB/s ± 1% ~ (p=0.615 n=6+6) Change-Id: I465a47f74227f316e3abea231444f48c7a30ef85 Reviewed-on: https://go-review.googlesource.com/24493 Run-TryBot: Dmitry Vyukov <dvyukov@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/runtime/msize.go')
-rw-r--r--src/runtime/msize.go61
1 files changed, 44 insertions, 17 deletions
diff --git a/src/runtime/msize.go b/src/runtime/msize.go
index 18577b309b..00c1e9d340 100644
--- a/src/runtime/msize.go
+++ b/src/runtime/msize.go
@@ -46,21 +46,27 @@ package runtime
// size divided by 128 (rounded up). The arrays are filled in
// by InitSizes.
-var class_to_size [_NumSizeClasses]int32
-var class_to_allocnpages [_NumSizeClasses]int32
+const (
+ smallSizeDiv = 8
+ smallSizeMax = 1024
+ largeSizeDiv = 128
+)
+
+var class_to_size [_NumSizeClasses]uint32
+var class_to_allocnpages [_NumSizeClasses]uint32
var class_to_divmagic [_NumSizeClasses]divMagic
-var size_to_class8 [1024/8 + 1]int8
-var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
+var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8
+var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8
-func sizeToClass(size int32) int32 {
+func sizeToClass(size uint32) uint32 {
if size > _MaxSmallSize {
throw("invalid size")
}
- if size > 1024-8 {
- return int32(size_to_class128[(size-1024+127)>>7])
+ if size > smallSizeMax-8 {
+ return uint32(size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv])
}
- return int32(size_to_class8[(size+7)>>3])
+ return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv])
}
func initSizes() {
@@ -97,20 +103,41 @@ func initSizes() {
// use just this size instead of having two
// different sizes.
if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
- class_to_size[sizeclass-1] = int32(size)
+ class_to_size[sizeclass-1] = uint32(size)
continue
}
- class_to_allocnpages[sizeclass] = int32(npages)
- class_to_size[sizeclass] = int32(size)
+ class_to_allocnpages[sizeclass] = uint32(npages)
+ class_to_size[sizeclass] = uint32(size)
sizeclass++
}
if sizeclass != _NumSizeClasses {
print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
throw("bad NumSizeClasses")
}
+
+ // Increase object sizes if we can fit the same number of larger objects
+ // into the same number of pages. For example, we choose size 8448 above
+ // with 6 objects in 7 pages. But we can well use object size 9472,
+ // which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
+ // We need to preserve at least largeSizeDiv alignment otherwise
+ // sizeToClass won't work.
+ for i := 1; i < _NumSizeClasses; i++ {
+ npages := class_to_allocnpages[i]
+ psize := npages * _PageSize
+ size := class_to_size[i]
+ new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1)
+ if new_size > size {
+ class_to_size[i] = new_size
+ }
+ }
+
// Check maxObjsPerSpan => number of objects invariant.
for i, size := range class_to_size {
+ if i != 0 && class_to_size[i-1] >= size {
+ throw("non-monotonic size classes")
+ }
+
if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan {
throw("span contains too many objects")
}
@@ -122,18 +149,18 @@ func initSizes() {
nextsize := 0
for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
- size_to_class8[nextsize/8] = int8(sizeclass)
+ size_to_class8[nextsize/8] = uint8(sizeclass)
}
if nextsize >= 1024 {
for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
- size_to_class128[(nextsize-1024)/128] = int8(sizeclass)
+ size_to_class128[(nextsize-1024)/128] = uint8(sizeclass)
}
}
}
// Double-check SizeToClass.
if false {
- for n := int32(0); n < _MaxSmallSize; n++ {
+ for n := uint32(0); n < _MaxSmallSize; n++ {
sizeclass := sizeToClass(n)
if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
@@ -186,10 +213,10 @@ dump:
// Returns size of the memory block that mallocgc will allocate if you ask for the size.
func roundupsize(size uintptr) uintptr {
if size < _MaxSmallSize {
- if size <= 1024-8 {
- return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
+ if size <= smallSizeMax-8 {
+ return uintptr(class_to_size[size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]])
} else {
- return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
+ return uintptr(class_to_size[size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]])
}
}
if size+_PageSize < size {