big.nat: Improved speed of nat-to-string conversion

Three optimizations: First, special-case power of two bases that partion a Word(), bases 2, 4, 16, and 256. These can be moved directly from internal Word() storage to the output without multiprecision operations. Next, same approach for the other power-of-two bases, 8, 32, 64, and 128. These don't fill a Word() evenly, so special handling is needed for those cases where input spans the high-bits of one Word and the low bis of the next one. Finally, implement the general case for others bases in 2 <= base <= 256 using superbases, the largest power of base representable in a Word(). For base ten, this is 9 digits and a superbase of 10^9 for 32-bit Words and 19 digits and 10^19 for 64-bit compiles. This way we do just 1/9th or 1/19th of the expensive multiprecision divisions, unpacking superdigits using fast native machine arithmetic. The resulting code runs 7x to 800x the speed of the previous approach, depending on the length of the number to be converted--longer is relatively faster. Also, extended the tests and benchmarks for string to nat (scan()) and nat to string (string()) functions. A further enhancement awaits the next CL to make general cases about 7x faster for long cases. R=gri CC=golang-dev https://golang.org/cl/4595041
author: Michael T. Jones <mtj@google.com> 2011-06-07 16:02:34 -0700
committer: Robert Griesemer <gri@golang.org> 2011-06-07 16:02:34 -0700
commit: d5c45c541d71af0ead44a46eca9e38272481d5bb (patch)
tree: 4d402c75d40de73829c14e46f86ea2f06a23b3b2
parent: c5281d7ab98996a75eabfd2e6a4075986f93f29a (diff)
download: go-d5c45c541d71af0ead44a46eca9e38272481d5bb.tar.gz
go-d5c45c541d71af0ead44a46eca9e38272481d5bb.zip
2 files changed, 324 insertions, 18 deletions
diff --git a/src/pkg/big/nat.go b/src/pkg/big/nat.go
index fa09d65315..ea1903b166 100755
--- a/src/pkg/big/nat.go
+++ b/src/pkg/big/nat.go
@@ -615,6 +615,7 @@ func (x nat) bitLen() int {
 // MaxBase is the largest number base accepted for string conversions.
 const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1
 
+
 func hexValue(ch int) Word {
 	d := MaxBase + 1 // illegal base
 	switch {
@@ -733,46 +734,137 @@ const (
 	uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 )
 
+
+// decimalString returns a decimal representation of x.
+// It calls x.string with the charset "0123456789".
+func (x nat) decimalString() string {
+	return x.string(lowercaseDigits[0:10])
+}
+
+
 // string converts x to a string using digits from a charset; a digit with
 // value d is represented by charset[d]. The conversion base is determined
 // by len(charset), which must be >= 2.
 func (x nat) string(charset string) string {
-	base := len(charset)
+	b := Word(len(charset))
 
 	// special cases
 	switch {
-	case base < 2:
+	case b < 2 || b > 256:
 		panic("illegal base")
 	case len(x) == 0:
 		return string(charset[0])
 	}
 
 	// allocate buffer for conversion
-	i := x.bitLen()/log2(Word(base)) + 1 // +1: round up
+	i := x.bitLen()/log2(b) + 1 // +1: round up
 	s := make([]byte, i)
 
-	// don't destroy x
+	// special case: power of two bases can avoid divisions completely
+	if b == b&-b {
+		// shift is base-b digit size in bits
+		shift := uint(trailingZeroBits(b)) // shift > 0 because b >= 2
+		m := len(x)
+		mask := Word(1)<<shift - 1
+		w := x[0]
+		nbits := uint(_W) // number of unprocessed bits in w
+
+		// convert less-significant words
+		for k := 0; k < m-1; k++ {
+			// convert full digits
+			for nbits >= shift {
+				i--
+				s[i] = charset[w&mask]
+				w >>= shift
+				nbits -= shift
+			}
+
+			// convert any partial leading digit and advance to next word
+			if nbits == 0 {
+				// no partial digit remaining, just advance
+				w = x[k+1]
+				nbits = _W
+			} else {
+				// partial digit in current (k) and next (k+1) word
+				w |= x[k+1] << nbits
+				i--
+				s[i] = charset[w&mask]
+
+				// advance
+				w = x[k+1] >> (shift - nbits)
+				nbits = _W - (shift - nbits)
+			}
+		}
+
+		// convert digits of most-significant word (omit leading zeros)
+		for nbits >= 0 && w != 0 {
+			i--
+			s[i] = charset[w&mask]
+			w >>= shift
+			nbits -= shift
+		}
+
+		return string(s[i:])
+	}
+
+	// general case: extract groups of digits by multiprecision division
+
+	// maximize ndigits where b**ndigits < 2^_W; bb (big base) is b**ndigits
+	bb := Word(1)
+	ndigits := 0
+	for max := Word(_M / b); bb <= max; bb *= b {
+		ndigits++
+	}
+
+	// preserve x, create local copy for use in repeated divisions
 	q := nat(nil).set(x)
+	var r Word
 
 	// convert
-	for len(q) > 0 {
-		i--
-		var r Word
-		q, r = q.divW(q, Word(base))
-		s[i] = charset[r]
+	if b == 10 { // hard-coding for 10 here speeds this up by 1.25x
+		for len(q) > 0 {
+			// extract least significant, base bb "digit"
+			q, r = q.divW(q, bb) // N.B. >82% of time is here. Optimize divW
+			if len(q) == 0 {
+				// skip leading zeros in most-significant group of digits
+				for j := 0; j < ndigits && r != 0; j++ {
+					i--
+					s[i] = charset[r%10]
+					r /= 10
+				}
+			} else {
+				for j := 0; j < ndigits; j++ {
+					i--
+					s[i] = charset[r%10]
+					r /= 10
+				}
+			}
+		}
+	} else {
+		for len(q) > 0 {
+			// extract least significant group of digits
+			q, r = q.divW(q, bb) // N.B. >82% of time is here. Optimize divW
+			if len(q) == 0 {
+				// skip leading zeros in most-significant group of digits
+				for j := 0; j < ndigits && r != 0; j++ {
+					i--
+					s[i] = charset[r%b]
+					r /= b
+				}
+			} else {
+				for j := 0; j < ndigits; j++ {
+					i--
+					s[i] = charset[r%b]
+					r /= b
+				}
+			}
+		}
 	}
 
 	return string(s[i:])
 }
 
 
-// decimalString returns a decimal representation of x.
-// It calls x.string with the charset "0123456789".
-func (x nat) decimalString() string {
-	return x.string(lowercaseDigits[0:10])
-}
-
-
 const deBruijn32 = 0x077CB531
 
 var deBruijn32Lookup = []byte{
@@ -789,6 +881,7 @@ var deBruijn64Lookup = []byte{
 	54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
 }
 
+
 // trailingZeroBits returns the number of consecutive zero bits on the right
 // side of the given Word.
 // See Knuth, volume 4, section 7.3.1
@@ -960,7 +1053,9 @@ func (z nat) xor(x, y nat) nat {
 
 
 // greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2)
-func greaterThan(x1, x2, y1, y2 Word) bool { return x1 > y1 || x1 == y1 && x2 > y2 }
+func greaterThan(x1, x2, y1, y2 Word) bool {
+	return x1 > y1 || x1 == y1 && x2 > y2
+}
 
 
 // modW returns x % d.
diff --git a/src/pkg/big/nat_test.go b/src/pkg/big/nat_test.go
index 50ea469be0..fd93592ddc 100755
--- a/src/pkg/big/nat_test.go
+++ b/src/pkg/big/nat_test.go
@@ -5,6 +5,7 @@
 package big
 
 import (
+	"fmt"
 	"os"
 	"strings"
 	"testing"
@@ -171,6 +172,36 @@ func BenchmarkMul(b *testing.B) {
 }
 
 
+func toString(x nat, charset string) string {
+	base := len(charset)
+
+	// special cases
+	switch {
+	case base < 2:
+		panic("illegal base")
+	case len(x) == 0:
+		return string(charset[0])
+	}
+
+	// allocate buffer for conversion
+	i := x.bitLen()/log2(Word(base)) + 1 // +1: round up
+	s := make([]byte, i)
+
+	// don't destroy x
+	q := nat(nil).set(x)
+
+	// convert
+	for len(q) > 0 {
+		i--
+		var r Word
+		q, r = q.divW(q, Word(base))
+		s[i] = charset[r]
+	}
+
+	return string(s[i:])
+}
+
+
 var strTests = []struct {
 	x nat    // nat value to be converted
 	c string // conversion charset
@@ -360,6 +391,187 @@ func BenchmarkScanPi(b *testing.B) {
 }
 
 
+const (
+	// 314**271
+	// base  2: 2249 digits
+	// base  8:  751 digits
+	// base 10:  678 digits
+	// base 16:  563 digits
+	shortBase     = 314
+	shortExponent = 271
+
+	// 3141**2178
+	// base  2: 31577 digits
+	// base  8: 10527 digits
+	// base 10:  9507 digits
+	// base 16:  7895 digits
+	mediumBase     = 3141
+	mediumExponent = 2718
+
+	// 3141**2178
+	// base  2: 406078 digits
+	// base  8: 135360 digits
+	// base 10: 122243 digits
+	// base 16: 101521 digits
+	longBase     = 31415
+	longExponent = 27182
+)
+
+
+func BenchmarkScanShort2(b *testing.B) {
+	ScanHelper(b, 2, shortBase, shortExponent)
+}
+
+
+func BenchmarkScanShort8(b *testing.B) {
+	ScanHelper(b, 8, shortBase, shortExponent)
+}
+
+
+func BenchmarkScanSort10(b *testing.B) {
+	ScanHelper(b, 10, shortBase, shortExponent)
+}
+
+
+func BenchmarkScanShort16(b *testing.B) {
+	ScanHelper(b, 16, shortBase, shortExponent)
+}
+
+
+func BenchmarkScanMedium2(b *testing.B) {
+	ScanHelper(b, 2, mediumBase, mediumExponent)
+}
+
+
+func BenchmarkScanMedium8(b *testing.B) {
+	ScanHelper(b, 8, mediumBase, mediumExponent)
+}
+
+
+func BenchmarkScanMedium10(b *testing.B) {
+	ScanHelper(b, 10, mediumBase, mediumExponent)
+}
+
+
+func BenchmarkScanMedium16(b *testing.B) {
+	ScanHelper(b, 16, mediumBase, mediumExponent)
+}
+
+
+func BenchmarkScanLong2(b *testing.B) {
+	ScanHelper(b, 2, longBase, longExponent)
+}
+
+
+func BenchmarkScanLong8(b *testing.B) {
+	ScanHelper(b, 8, longBase, longExponent)
+}
+
+
+func BenchmarkScanLong10(b *testing.B) {
+	ScanHelper(b, 10, longBase, longExponent)
+}
+
+
+func BenchmarkScanLong16(b *testing.B) {
+	ScanHelper(b, 16, longBase, longExponent)
+}
+
+
+func ScanHelper(b *testing.B, base int, xv, yv Word) {
+	b.StopTimer()
+	var x, y, z nat
+	x = x.setWord(xv)
+	y = y.setWord(yv)
+	z = z.expNN(x, y, nil)
+
+	var s string
+	s = z.string(lowercaseDigits[0:base])
+	if t := toString(z, lowercaseDigits[0:base]); t != s {
+		panic(fmt.Sprintf("scanning: got %s; want %s", s, t))
+	}
+	b.StartTimer()
+
+	for i := 0; i < b.N; i++ {
+		x.scan(strings.NewReader(s), base)
+	}
+}
+
+
+func BenchmarkStringShort2(b *testing.B) {
+	StringHelper(b, 2, shortBase, shortExponent)
+}
+
+
+func BenchmarkStringShort8(b *testing.B) {
+	StringHelper(b, 8, shortBase, shortExponent)
+}
+
+
+func BenchmarkStringShort10(b *testing.B) {
+	StringHelper(b, 10, shortBase, shortExponent)
+}
+
+
+func BenchmarkStringShort16(b *testing.B) {
+	StringHelper(b, 16, shortBase, shortExponent)
+}
+
+
+func BenchmarkStringMedium2(b *testing.B) {
+	StringHelper(b, 2, mediumBase, mediumExponent)
+}
+
+
+func BenchmarkStringMedium8(b *testing.B) {
+	StringHelper(b, 8, mediumBase, mediumExponent)
+}
+
+
+func BenchmarkStringMedium10(b *testing.B) {
+	StringHelper(b, 10, mediumBase, mediumExponent)
+}
+
+
+func BenchmarkStringMedium16(b *testing.B) {
+	StringHelper(b, 16, mediumBase, mediumExponent)
+}
+
+
+func BenchmarkStringLong2(b *testing.B) {
+	StringHelper(b, 2, longBase, longExponent)
+}
+
+
+func BenchmarkStringLong8(b *testing.B) {
+	StringHelper(b, 8, longBase, longExponent)
+}
+
+
+func BenchmarkStringLong10(b *testing.B) {
+	StringHelper(b, 10, longBase, longExponent)
+}
+
+
+func BenchmarkStringLong16(b *testing.B) {
+	StringHelper(b, 16, longBase, longExponent)
+}
+
+
+func StringHelper(b *testing.B, base int, xv, yv Word) {
+	b.StopTimer()
+	var x, y, z nat
+	x = x.setWord(xv)
+	y = y.setWord(yv)
+	z = z.expNN(x, y, nil)
+	b.StartTimer()
+
+	for i := 0; i < b.N; i++ {
+		z.string(lowercaseDigits[0:base])
+	}
+}
+
+
 func TestLeadingZeros(t *testing.T) {
 	var x Word = _B >> 1
 	for i := 0; i <= _W; i++ {
@@ -479,7 +691,6 @@ func TestTrailingZeroBits(t *testing.T) {
 	}
 }
 
-
 var expNNTests = []struct {
 	x, y, m string
 	out     string
author	Michael T. Jones <mtj@google.com>	2011-06-07 16:02:34 -0700
committer	Robert Griesemer <gri@golang.org>	2011-06-07 16:02:34 -0700
commit	d5c45c541d71af0ead44a46eca9e38272481d5bb (patch)
tree	4d402c75d40de73829c14e46f86ea2f06a23b3b2
parent	c5281d7ab98996a75eabfd2e6a4075986f93f29a (diff)
download	go-d5c45c541d71af0ead44a46eca9e38272481d5bb.tar.gz go-d5c45c541d71af0ead44a46eca9e38272481d5bb.zip