runtime: compute size classes statically

No point in computing this info on startup. Compute it at build time. This lets us spend more time computing & checking the size classes. Improve the div magic for rounding to the start of an object. We can now use 32-bit multiplies & shifts, which should help 32-bit platforms. The static data is <1KB. The actual size classes are not changed by this CL. Change-Id: I6450cec7d1b2b4ad31fd3f945f504ed2ec6570e7 Reviewed-on: https://go-review.googlesource.com/32219 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
author: Keith Randall <khr@golang.org> 2016-10-26 21:25:56 -0700
committer: Keith Randall <khr@golang.org> 2016-10-30 03:48:49 +0000
commit: 7ba36f4adb43355ef4b870d64d23f9988b1279ea (patch)
tree: 6cba94b528c17a72497235cdb3ffb2b3a8495a33 /src/runtime/msize.go
parent: 753caecc7e576e46a329b71c49dbf0ea9bfbd867 (diff)
download: go-7ba36f4adb43355ef4b870d64d23f9988b1279ea.tar.gz
go-7ba36f4adb43355ef4b870d64d23f9988b1279ea.zip
1 files changed, 5 insertions, 247 deletions
diff --git a/src/runtime/msize.go b/src/runtime/msize.go
index 00c1e9d340..438c987513 100644
--- a/src/runtime/msize.go
+++ b/src/runtime/msize.go
@@ -5,60 +5,22 @@
 // Malloc small size classes.
 //
 // See malloc.go for overview.
-//
-// The size classes are chosen so that rounding an allocation
-// request up to the next size class wastes at most 12.5% (1.125x).
-//
-// Each size class has its own page count that gets allocated
-// and chopped up when new objects of the size class are needed.
-// That page count is chosen so that chopping up the run of
-// pages into objects of the given size wastes at most 12.5% (1.125x)
-// of the memory. It is not necessary that the cutoff here be
-// the same as above.
-//
-// The two sources of waste multiply, so the worst possible case
-// for the above constraints would be that allocations of some
-// size might have a 26.6% (1.266x) overhead.
-// In practice, only one of the wastes comes into play for a
-// given size (sizes < 512 waste mainly on the round-up,
-// sizes > 512 waste mainly on the page chopping).
-//
-// TODO(rsc): Compute max waste for any given size.
+// See also mksizeclasses.go for how we decide what size classes to use.
 
 package runtime
 
-// Size classes. Computed and initialized by InitSizes.
-//
-// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
+// sizeToClass(0 <= n <= MaxSmallSize) returns the size class,
 //	1 <= sizeclass < NumSizeClasses, for n.
 //	Size class 0 is reserved to mean "not small".
 //
-// class_to_size[i] = largest size in class i
-// class_to_allocnpages[i] = number of pages to allocate when
-//	making new objects in class i
-
-// The SizeToClass lookup is implemented using two arrays,
+// The sizeToClass lookup is implemented using two arrays,
 // one mapping sizes <= 1024 to their class and one mapping
 // sizes >= 1024 and <= MaxSmallSize to their class.
 // All objects are 8-aligned, so the first array is indexed by
 // the size divided by 8 (rounded up).  Objects >= 1024 bytes
 // are 128-aligned, so the second array is indexed by the
-// size divided by 128 (rounded up).  The arrays are filled in
-// by InitSizes.
-
-const (
-	smallSizeDiv = 8
-	smallSizeMax = 1024
-	largeSizeDiv = 128
-)
-
-var class_to_size [_NumSizeClasses]uint32
-var class_to_allocnpages [_NumSizeClasses]uint32
-var class_to_divmagic [_NumSizeClasses]divMagic
-
-var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8
-var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8
-
+// size divided by 128 (rounded up).  The arrays are constants
+// in sizeclass.go generated by mksizeclass.go.
 func sizeToClass(size uint32) uint32 {
 	if size > _MaxSmallSize {
 		throw("invalid size")
@@ -69,147 +31,6 @@ func sizeToClass(size uint32) uint32 {
 	return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv])
 }
 
-func initSizes() {
-	// Initialize the runtime·class_to_size table (and choose class sizes in the process).
-	class_to_size[0] = 0
-	sizeclass := 1 // 0 means no class
-	align := 8
-	for size := align; size <= _MaxSmallSize; size += align {
-		if size&(size-1) == 0 { // bump alignment once in a while
-			if size >= 2048 {
-				align = 256
-			} else if size >= 128 {
-				align = size / 8
-			} else if size >= 16 {
-				align = 16 // required for x86 SSE instructions, if we want to use them
-			}
-		}
-		if align&(align-1) != 0 {
-			throw("incorrect alignment")
-		}
-
-		// Make the allocnpages big enough that
-		// the leftover is less than 1/8 of the total,
-		// so wasted space is at most 12.5%.
-		allocsize := _PageSize
-		for allocsize%size > allocsize/8 {
-			allocsize += _PageSize
-		}
-		npages := allocsize >> _PageShift
-
-		// If the previous sizeclass chose the same
-		// allocation size and fit the same number of
-		// objects into the page, we might as well
-		// use just this size instead of having two
-		// different sizes.
-		if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
-			class_to_size[sizeclass-1] = uint32(size)
-			continue
-		}
-
-		class_to_allocnpages[sizeclass] = uint32(npages)
-		class_to_size[sizeclass] = uint32(size)
-		sizeclass++
-	}
-	if sizeclass != _NumSizeClasses {
-		print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
-		throw("bad NumSizeClasses")
-	}
-
-	// Increase object sizes if we can fit the same number of larger objects
-	// into the same number of pages. For example, we choose size 8448 above
-	// with 6 objects in 7 pages. But we can well use object size 9472,
-	// which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
-	// We need to preserve at least largeSizeDiv alignment otherwise
-	// sizeToClass won't work.
-	for i := 1; i < _NumSizeClasses; i++ {
-		npages := class_to_allocnpages[i]
-		psize := npages * _PageSize
-		size := class_to_size[i]
-		new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1)
-		if new_size > size {
-			class_to_size[i] = new_size
-		}
-	}
-
-	// Check maxObjsPerSpan => number of objects invariant.
-	for i, size := range class_to_size {
-		if i != 0 && class_to_size[i-1] >= size {
-			throw("non-monotonic size classes")
-		}
-
-		if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan {
-			throw("span contains too many objects")
-		}
-		if size == 0 && i != 0 {
-			throw("size is 0 but class is not 0")
-		}
-	}
-	// Initialize the size_to_class tables.
-	nextsize := 0
-	for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
-		for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
-			size_to_class8[nextsize/8] = uint8(sizeclass)
-		}
-		if nextsize >= 1024 {
-			for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
-				size_to_class128[(nextsize-1024)/128] = uint8(sizeclass)
-			}
-		}
-	}
-
-	// Double-check SizeToClass.
-	if false {
-		for n := uint32(0); n < _MaxSmallSize; n++ {
-			sizeclass := sizeToClass(n)
-			if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
-				print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
-				print("incorrect SizeToClass\n")
-				goto dump
-			}
-			if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
-				print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
-				print("SizeToClass too big\n")
-				goto dump
-			}
-		}
-	}
-
-	testdefersizes()
-
-	// Copy out for statistics table.
-	for i := 0; i < len(class_to_size); i++ {
-		memstats.by_size[i].size = uint32(class_to_size[i])
-	}
-
-	for i := 1; i < len(class_to_size); i++ {
-		class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i]))
-	}
-
-	return
-
-dump:
-	if true {
-		print("runtime: NumSizeClasses=", _NumSizeClasses, "\n")
-		print("runtime·class_to_size:")
-		for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
-			print(" ", class_to_size[sizeclass], "")
-		}
-		print("\n\n")
-		print("runtime: size_to_class8:")
-		for i := 0; i < len(size_to_class8); i++ {
-			print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
-		}
-		print("\n")
-		print("runtime: size_to_class128:")
-		for i := 0; i < len(size_to_class128); i++ {
-			print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
-		}
-		print("\n")
-	}
-	throw("InitSizes failed")
-}
-
 // Returns size of the memory block that mallocgc will allocate if you ask for the size.
 func roundupsize(size uintptr) uintptr {
 	if size < _MaxSmallSize {
@@ -224,66 +45,3 @@ func roundupsize(size uintptr) uintptr {
 	}
 	return round(size, _PageSize)
 }
-
-// divMagic holds magic constants to implement division
-// by a particular constant as a shift, multiply, and shift.
-// That is, given
-//	m = computeMagic(d)
-// then
-//	n/d == ((n>>m.shift) * m.mul) >> m.shift2
-//
-// The magic computation picks m such that
-//	d = d₁*d₂
-//	d₂= 2^m.shift
-//	m.mul = ⌈2^m.shift2 / d₁⌉
-//
-// The magic computation here is tailored for malloc block sizes
-// and does not handle arbitrary d correctly. Malloc block sizes d are
-// always even, so the first shift implements the factors of 2 in d
-// and then the mul and second shift implement the odd factor
-// that remains. Because the first shift divides n by at least 2 (actually 8)
-// before the multiply gets involved, the huge corner cases that
-// require additional adjustment are impossible, so the usual
-// fixup is not needed.
-//
-// For more details see Hacker's Delight, Chapter 10, and
-// http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
-// http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
-type divMagic struct {
-	shift    uint8
-	mul      uint32
-	shift2   uint8
-	baseMask uintptr
-}
-
-func computeDivMagic(d uint32) divMagic {
-	var m divMagic
-
-	// If the size is a power of two, heapBitsForObject can divide even faster by masking.
-	// Compute this mask.
-	if d&(d-1) == 0 {
-		// It is a power of 2 (assuming dinptr != 1)
-		m.baseMask = ^(uintptr(d) - 1)
-	} else {
-		m.baseMask = 0
-	}
-
-	// Compute pre-shift by factoring power of 2 out of d.
-	for d&1 == 0 {
-		m.shift++
-		d >>= 1
-	}
-
-	// Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int.
-	// This is always a good enough approximation.
-	// We could use smaller k for some divisors but there's no point.
-	k := uint8(63)
-	d64 := uint64(d)
-	for ((1<<k)+d64-1)/d64 >= 1<<32 {
-		k--
-	}
-	m.mul = uint32(((1 << k) + d64 - 1) / d64) //  ⌈2^k / d⌉
-	m.shift2 = k
-
-	return m
-}
author	Keith Randall <khr@golang.org>	2016-10-26 21:25:56 -0700
committer	Keith Randall <khr@golang.org>	2016-10-30 03:48:49 +0000
commit	7ba36f4adb43355ef4b870d64d23f9988b1279ea (patch)
tree	6cba94b528c17a72497235cdb3ffb2b3a8495a33 /src/runtime/msize.go
parent	753caecc7e576e46a329b71c49dbf0ea9bfbd867 (diff)
download	go-7ba36f4adb43355ef4b870d64d23f9988b1279ea.tar.gz go-7ba36f4adb43355ef4b870d64d23f9988b1279ea.zip