diff options
author | Keith Randall <khr@golang.org> | 2016-10-26 21:25:56 -0700 |
---|---|---|
committer | Keith Randall <khr@golang.org> | 2016-10-30 03:48:49 +0000 |
commit | 7ba36f4adb43355ef4b870d64d23f9988b1279ea (patch) | |
tree | 6cba94b528c17a72497235cdb3ffb2b3a8495a33 /src/runtime/msize.go | |
parent | 753caecc7e576e46a329b71c49dbf0ea9bfbd867 (diff) | |
download | go-7ba36f4adb43355ef4b870d64d23f9988b1279ea.tar.gz go-7ba36f4adb43355ef4b870d64d23f9988b1279ea.zip |
runtime: compute size classes statically
No point in computing this info on startup.
Compute it at build time.
This lets us spend more time computing & checking the size classes.
Improve the div magic for rounding to the start of an object.
We can now use 32-bit multiplies & shifts, which should help
32-bit platforms.
The static data is <1KB.
The actual size classes are not changed by this CL.
Change-Id: I6450cec7d1b2b4ad31fd3f945f504ed2ec6570e7
Reviewed-on: https://go-review.googlesource.com/32219
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/runtime/msize.go')
-rw-r--r-- | src/runtime/msize.go | 252 |
1 files changed, 5 insertions, 247 deletions
diff --git a/src/runtime/msize.go b/src/runtime/msize.go index 00c1e9d340..438c987513 100644 --- a/src/runtime/msize.go +++ b/src/runtime/msize.go @@ -5,60 +5,22 @@ // Malloc small size classes. // // See malloc.go for overview. -// -// The size classes are chosen so that rounding an allocation -// request up to the next size class wastes at most 12.5% (1.125x). -// -// Each size class has its own page count that gets allocated -// and chopped up when new objects of the size class are needed. -// That page count is chosen so that chopping up the run of -// pages into objects of the given size wastes at most 12.5% (1.125x) -// of the memory. It is not necessary that the cutoff here be -// the same as above. -// -// The two sources of waste multiply, so the worst possible case -// for the above constraints would be that allocations of some -// size might have a 26.6% (1.266x) overhead. -// In practice, only one of the wastes comes into play for a -// given size (sizes < 512 waste mainly on the round-up, -// sizes > 512 waste mainly on the page chopping). -// -// TODO(rsc): Compute max waste for any given size. +// See also mksizeclasses.go for how we decide what size classes to use. package runtime -// Size classes. Computed and initialized by InitSizes. -// -// SizeToClass(0 <= n <= MaxSmallSize) returns the size class, +// sizeToClass(0 <= n <= MaxSmallSize) returns the size class, // 1 <= sizeclass < NumSizeClasses, for n. // Size class 0 is reserved to mean "not small". // -// class_to_size[i] = largest size in class i -// class_to_allocnpages[i] = number of pages to allocate when -// making new objects in class i - -// The SizeToClass lookup is implemented using two arrays, +// The sizeToClass lookup is implemented using two arrays, // one mapping sizes <= 1024 to their class and one mapping // sizes >= 1024 and <= MaxSmallSize to their class. // All objects are 8-aligned, so the first array is indexed by // the size divided by 8 (rounded up). Objects >= 1024 bytes // are 128-aligned, so the second array is indexed by the -// size divided by 128 (rounded up). The arrays are filled in -// by InitSizes. - -const ( - smallSizeDiv = 8 - smallSizeMax = 1024 - largeSizeDiv = 128 -) - -var class_to_size [_NumSizeClasses]uint32 -var class_to_allocnpages [_NumSizeClasses]uint32 -var class_to_divmagic [_NumSizeClasses]divMagic - -var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8 -var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8 - +// size divided by 128 (rounded up). The arrays are constants +// in sizeclass.go generated by mksizeclass.go. func sizeToClass(size uint32) uint32 { if size > _MaxSmallSize { throw("invalid size") @@ -69,147 +31,6 @@ func sizeToClass(size uint32) uint32 { return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]) } -func initSizes() { - // Initialize the runtime·class_to_size table (and choose class sizes in the process). - class_to_size[0] = 0 - sizeclass := 1 // 0 means no class - align := 8 - for size := align; size <= _MaxSmallSize; size += align { - if size&(size-1) == 0 { // bump alignment once in a while - if size >= 2048 { - align = 256 - } else if size >= 128 { - align = size / 8 - } else if size >= 16 { - align = 16 // required for x86 SSE instructions, if we want to use them - } - } - if align&(align-1) != 0 { - throw("incorrect alignment") - } - - // Make the allocnpages big enough that - // the leftover is less than 1/8 of the total, - // so wasted space is at most 12.5%. - allocsize := _PageSize - for allocsize%size > allocsize/8 { - allocsize += _PageSize - } - npages := allocsize >> _PageShift - - // If the previous sizeclass chose the same - // allocation size and fit the same number of - // objects into the page, we might as well - // use just this size instead of having two - // different sizes. - if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) { - class_to_size[sizeclass-1] = uint32(size) - continue - } - - class_to_allocnpages[sizeclass] = uint32(npages) - class_to_size[sizeclass] = uint32(size) - sizeclass++ - } - if sizeclass != _NumSizeClasses { - print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n") - throw("bad NumSizeClasses") - } - - // Increase object sizes if we can fit the same number of larger objects - // into the same number of pages. For example, we choose size 8448 above - // with 6 objects in 7 pages. But we can well use object size 9472, - // which is also 6 objects in 7 pages but +1024 bytes (+12.12%). - // We need to preserve at least largeSizeDiv alignment otherwise - // sizeToClass won't work. - for i := 1; i < _NumSizeClasses; i++ { - npages := class_to_allocnpages[i] - psize := npages * _PageSize - size := class_to_size[i] - new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1) - if new_size > size { - class_to_size[i] = new_size - } - } - - // Check maxObjsPerSpan => number of objects invariant. - for i, size := range class_to_size { - if i != 0 && class_to_size[i-1] >= size { - throw("non-monotonic size classes") - } - - if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan { - throw("span contains too many objects") - } - if size == 0 && i != 0 { - throw("size is 0 but class is not 0") - } - } - // Initialize the size_to_class tables. - nextsize := 0 - for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ { - for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 { - size_to_class8[nextsize/8] = uint8(sizeclass) - } - if nextsize >= 1024 { - for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 { - size_to_class128[(nextsize-1024)/128] = uint8(sizeclass) - } - } - } - - // Double-check SizeToClass. - if false { - for n := uint32(0); n < _MaxSmallSize; n++ { - sizeclass := sizeToClass(n) - if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n { - print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") - print("incorrect SizeToClass\n") - goto dump - } - if sizeclass > 1 && class_to_size[sizeclass-1] >= n { - print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n") - print("SizeToClass too big\n") - goto dump - } - } - } - - testdefersizes() - - // Copy out for statistics table. - for i := 0; i < len(class_to_size); i++ { - memstats.by_size[i].size = uint32(class_to_size[i]) - } - - for i := 1; i < len(class_to_size); i++ { - class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i])) - } - - return - -dump: - if true { - print("runtime: NumSizeClasses=", _NumSizeClasses, "\n") - print("runtime·class_to_size:") - for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ { - print(" ", class_to_size[sizeclass], "") - } - print("\n\n") - print("runtime: size_to_class8:") - for i := 0; i < len(size_to_class8); i++ { - print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n") - } - print("\n") - print("runtime: size_to_class128:") - for i := 0; i < len(size_to_class128); i++ { - print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n") - } - print("\n") - } - throw("InitSizes failed") -} - // Returns size of the memory block that mallocgc will allocate if you ask for the size. func roundupsize(size uintptr) uintptr { if size < _MaxSmallSize { @@ -224,66 +45,3 @@ func roundupsize(size uintptr) uintptr { } return round(size, _PageSize) } - -// divMagic holds magic constants to implement division -// by a particular constant as a shift, multiply, and shift. -// That is, given -// m = computeMagic(d) -// then -// n/d == ((n>>m.shift) * m.mul) >> m.shift2 -// -// The magic computation picks m such that -// d = d₁*d₂ -// d₂= 2^m.shift -// m.mul = ⌈2^m.shift2 / d₁⌉ -// -// The magic computation here is tailored for malloc block sizes -// and does not handle arbitrary d correctly. Malloc block sizes d are -// always even, so the first shift implements the factors of 2 in d -// and then the mul and second shift implement the odd factor -// that remains. Because the first shift divides n by at least 2 (actually 8) -// before the multiply gets involved, the huge corner cases that -// require additional adjustment are impossible, so the usual -// fixup is not needed. -// -// For more details see Hacker's Delight, Chapter 10, and -// http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html -// http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html -type divMagic struct { - shift uint8 - mul uint32 - shift2 uint8 - baseMask uintptr -} - -func computeDivMagic(d uint32) divMagic { - var m divMagic - - // If the size is a power of two, heapBitsForObject can divide even faster by masking. - // Compute this mask. - if d&(d-1) == 0 { - // It is a power of 2 (assuming dinptr != 1) - m.baseMask = ^(uintptr(d) - 1) - } else { - m.baseMask = 0 - } - - // Compute pre-shift by factoring power of 2 out of d. - for d&1 == 0 { - m.shift++ - d >>= 1 - } - - // Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int. - // This is always a good enough approximation. - // We could use smaller k for some divisors but there's no point. - k := uint8(63) - d64 := uint64(d) - for ((1<<k)+d64-1)/d64 >= 1<<32 { - k-- - } - m.mul = uint32(((1 << k) + d64 - 1) / d64) // ⌈2^k / d⌉ - m.shift2 = k - - return m -} |