diff options
author | Michael T. Jones <mtj@google.com> | 2011-06-07 16:02:34 -0700 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2011-06-07 16:02:34 -0700 |
commit | d5c45c541d71af0ead44a46eca9e38272481d5bb (patch) | |
tree | 4d402c75d40de73829c14e46f86ea2f06a23b3b2 | |
parent | c5281d7ab98996a75eabfd2e6a4075986f93f29a (diff) | |
download | go-d5c45c541d71af0ead44a46eca9e38272481d5bb.tar.gz go-d5c45c541d71af0ead44a46eca9e38272481d5bb.zip |
big.nat: Improved speed of nat-to-string conversion
Three optimizations: First, special-case power of two bases
that partion a Word(), bases 2, 4, 16, and 256. These can
be moved directly from internal Word() storage to the output
without multiprecision operations. Next, same approach for
the other power-of-two bases, 8, 32, 64, and 128. These
don't fill a Word() evenly, so special handling is needed
for those cases where input spans the high-bits of one Word
and the low bis of the next one. Finally, implement the
general case for others bases in 2 <= base <= 256 using
superbases, the largest power of base representable in a
Word(). For base ten, this is 9 digits and a superbase of
10^9 for 32-bit Words and 19 digits and 10^19 for 64-bit
compiles. This way we do just 1/9th or 1/19th of the expensive
multiprecision divisions, unpacking superdigits using fast
native machine arithmetic. The resulting code runs 7x to
800x the speed of the previous approach, depending on the
length of the number to be converted--longer is relatively
faster.
Also, extended the tests and benchmarks for string to nat
(scan()) and nat to string (string()) functions. A further
enhancement awaits the next CL to make general cases about
7x faster for long cases.
R=gri
CC=golang-dev
https://golang.org/cl/4595041
-rwxr-xr-x | src/pkg/big/nat.go | 129 | ||||
-rwxr-xr-x | src/pkg/big/nat_test.go | 213 |
2 files changed, 324 insertions, 18 deletions
diff --git a/src/pkg/big/nat.go b/src/pkg/big/nat.go index fa09d65315..ea1903b166 100755 --- a/src/pkg/big/nat.go +++ b/src/pkg/big/nat.go @@ -615,6 +615,7 @@ func (x nat) bitLen() int { // MaxBase is the largest number base accepted for string conversions. const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1 + func hexValue(ch int) Word { d := MaxBase + 1 // illegal base switch { @@ -733,46 +734,137 @@ const ( uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" ) + +// decimalString returns a decimal representation of x. +// It calls x.string with the charset "0123456789". +func (x nat) decimalString() string { + return x.string(lowercaseDigits[0:10]) +} + + // string converts x to a string using digits from a charset; a digit with // value d is represented by charset[d]. The conversion base is determined // by len(charset), which must be >= 2. func (x nat) string(charset string) string { - base := len(charset) + b := Word(len(charset)) // special cases switch { - case base < 2: + case b < 2 || b > 256: panic("illegal base") case len(x) == 0: return string(charset[0]) } // allocate buffer for conversion - i := x.bitLen()/log2(Word(base)) + 1 // +1: round up + i := x.bitLen()/log2(b) + 1 // +1: round up s := make([]byte, i) - // don't destroy x + // special case: power of two bases can avoid divisions completely + if b == b&-b { + // shift is base-b digit size in bits + shift := uint(trailingZeroBits(b)) // shift > 0 because b >= 2 + m := len(x) + mask := Word(1)<<shift - 1 + w := x[0] + nbits := uint(_W) // number of unprocessed bits in w + + // convert less-significant words + for k := 0; k < m-1; k++ { + // convert full digits + for nbits >= shift { + i-- + s[i] = charset[w&mask] + w >>= shift + nbits -= shift + } + + // convert any partial leading digit and advance to next word + if nbits == 0 { + // no partial digit remaining, just advance + w = x[k+1] + nbits = _W + } else { + // partial digit in current (k) and next (k+1) word + w |= x[k+1] << nbits + i-- + s[i] = charset[w&mask] + + // advance + w = x[k+1] >> (shift - nbits) + nbits = _W - (shift - nbits) + } + } + + // convert digits of most-significant word (omit leading zeros) + for nbits >= 0 && w != 0 { + i-- + s[i] = charset[w&mask] + w >>= shift + nbits -= shift + } + + return string(s[i:]) + } + + // general case: extract groups of digits by multiprecision division + + // maximize ndigits where b**ndigits < 2^_W; bb (big base) is b**ndigits + bb := Word(1) + ndigits := 0 + for max := Word(_M / b); bb <= max; bb *= b { + ndigits++ + } + + // preserve x, create local copy for use in repeated divisions q := nat(nil).set(x) + var r Word // convert - for len(q) > 0 { - i-- - var r Word - q, r = q.divW(q, Word(base)) - s[i] = charset[r] + if b == 10 { // hard-coding for 10 here speeds this up by 1.25x + for len(q) > 0 { + // extract least significant, base bb "digit" + q, r = q.divW(q, bb) // N.B. >82% of time is here. Optimize divW + if len(q) == 0 { + // skip leading zeros in most-significant group of digits + for j := 0; j < ndigits && r != 0; j++ { + i-- + s[i] = charset[r%10] + r /= 10 + } + } else { + for j := 0; j < ndigits; j++ { + i-- + s[i] = charset[r%10] + r /= 10 + } + } + } + } else { + for len(q) > 0 { + // extract least significant group of digits + q, r = q.divW(q, bb) // N.B. >82% of time is here. Optimize divW + if len(q) == 0 { + // skip leading zeros in most-significant group of digits + for j := 0; j < ndigits && r != 0; j++ { + i-- + s[i] = charset[r%b] + r /= b + } + } else { + for j := 0; j < ndigits; j++ { + i-- + s[i] = charset[r%b] + r /= b + } + } + } } return string(s[i:]) } -// decimalString returns a decimal representation of x. -// It calls x.string with the charset "0123456789". -func (x nat) decimalString() string { - return x.string(lowercaseDigits[0:10]) -} - - const deBruijn32 = 0x077CB531 var deBruijn32Lookup = []byte{ @@ -789,6 +881,7 @@ var deBruijn64Lookup = []byte{ 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, } + // trailingZeroBits returns the number of consecutive zero bits on the right // side of the given Word. // See Knuth, volume 4, section 7.3.1 @@ -960,7 +1053,9 @@ func (z nat) xor(x, y nat) nat { // greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2) -func greaterThan(x1, x2, y1, y2 Word) bool { return x1 > y1 || x1 == y1 && x2 > y2 } +func greaterThan(x1, x2, y1, y2 Word) bool { + return x1 > y1 || x1 == y1 && x2 > y2 +} // modW returns x % d. diff --git a/src/pkg/big/nat_test.go b/src/pkg/big/nat_test.go index 50ea469be0..fd93592ddc 100755 --- a/src/pkg/big/nat_test.go +++ b/src/pkg/big/nat_test.go @@ -5,6 +5,7 @@ package big import ( + "fmt" "os" "strings" "testing" @@ -171,6 +172,36 @@ func BenchmarkMul(b *testing.B) { } +func toString(x nat, charset string) string { + base := len(charset) + + // special cases + switch { + case base < 2: + panic("illegal base") + case len(x) == 0: + return string(charset[0]) + } + + // allocate buffer for conversion + i := x.bitLen()/log2(Word(base)) + 1 // +1: round up + s := make([]byte, i) + + // don't destroy x + q := nat(nil).set(x) + + // convert + for len(q) > 0 { + i-- + var r Word + q, r = q.divW(q, Word(base)) + s[i] = charset[r] + } + + return string(s[i:]) +} + + var strTests = []struct { x nat // nat value to be converted c string // conversion charset @@ -360,6 +391,187 @@ func BenchmarkScanPi(b *testing.B) { } +const ( + // 314**271 + // base 2: 2249 digits + // base 8: 751 digits + // base 10: 678 digits + // base 16: 563 digits + shortBase = 314 + shortExponent = 271 + + // 3141**2178 + // base 2: 31577 digits + // base 8: 10527 digits + // base 10: 9507 digits + // base 16: 7895 digits + mediumBase = 3141 + mediumExponent = 2718 + + // 3141**2178 + // base 2: 406078 digits + // base 8: 135360 digits + // base 10: 122243 digits + // base 16: 101521 digits + longBase = 31415 + longExponent = 27182 +) + + +func BenchmarkScanShort2(b *testing.B) { + ScanHelper(b, 2, shortBase, shortExponent) +} + + +func BenchmarkScanShort8(b *testing.B) { + ScanHelper(b, 8, shortBase, shortExponent) +} + + +func BenchmarkScanSort10(b *testing.B) { + ScanHelper(b, 10, shortBase, shortExponent) +} + + +func BenchmarkScanShort16(b *testing.B) { + ScanHelper(b, 16, shortBase, shortExponent) +} + + +func BenchmarkScanMedium2(b *testing.B) { + ScanHelper(b, 2, mediumBase, mediumExponent) +} + + +func BenchmarkScanMedium8(b *testing.B) { + ScanHelper(b, 8, mediumBase, mediumExponent) +} + + +func BenchmarkScanMedium10(b *testing.B) { + ScanHelper(b, 10, mediumBase, mediumExponent) +} + + +func BenchmarkScanMedium16(b *testing.B) { + ScanHelper(b, 16, mediumBase, mediumExponent) +} + + +func BenchmarkScanLong2(b *testing.B) { + ScanHelper(b, 2, longBase, longExponent) +} + + +func BenchmarkScanLong8(b *testing.B) { + ScanHelper(b, 8, longBase, longExponent) +} + + +func BenchmarkScanLong10(b *testing.B) { + ScanHelper(b, 10, longBase, longExponent) +} + + +func BenchmarkScanLong16(b *testing.B) { + ScanHelper(b, 16, longBase, longExponent) +} + + +func ScanHelper(b *testing.B, base int, xv, yv Word) { + b.StopTimer() + var x, y, z nat + x = x.setWord(xv) + y = y.setWord(yv) + z = z.expNN(x, y, nil) + + var s string + s = z.string(lowercaseDigits[0:base]) + if t := toString(z, lowercaseDigits[0:base]); t != s { + panic(fmt.Sprintf("scanning: got %s; want %s", s, t)) + } + b.StartTimer() + + for i := 0; i < b.N; i++ { + x.scan(strings.NewReader(s), base) + } +} + + +func BenchmarkStringShort2(b *testing.B) { + StringHelper(b, 2, shortBase, shortExponent) +} + + +func BenchmarkStringShort8(b *testing.B) { + StringHelper(b, 8, shortBase, shortExponent) +} + + +func BenchmarkStringShort10(b *testing.B) { + StringHelper(b, 10, shortBase, shortExponent) +} + + +func BenchmarkStringShort16(b *testing.B) { + StringHelper(b, 16, shortBase, shortExponent) +} + + +func BenchmarkStringMedium2(b *testing.B) { + StringHelper(b, 2, mediumBase, mediumExponent) +} + + +func BenchmarkStringMedium8(b *testing.B) { + StringHelper(b, 8, mediumBase, mediumExponent) +} + + +func BenchmarkStringMedium10(b *testing.B) { + StringHelper(b, 10, mediumBase, mediumExponent) +} + + +func BenchmarkStringMedium16(b *testing.B) { + StringHelper(b, 16, mediumBase, mediumExponent) +} + + +func BenchmarkStringLong2(b *testing.B) { + StringHelper(b, 2, longBase, longExponent) +} + + +func BenchmarkStringLong8(b *testing.B) { + StringHelper(b, 8, longBase, longExponent) +} + + +func BenchmarkStringLong10(b *testing.B) { + StringHelper(b, 10, longBase, longExponent) +} + + +func BenchmarkStringLong16(b *testing.B) { + StringHelper(b, 16, longBase, longExponent) +} + + +func StringHelper(b *testing.B, base int, xv, yv Word) { + b.StopTimer() + var x, y, z nat + x = x.setWord(xv) + y = y.setWord(yv) + z = z.expNN(x, y, nil) + b.StartTimer() + + for i := 0; i < b.N; i++ { + z.string(lowercaseDigits[0:base]) + } +} + + func TestLeadingZeros(t *testing.T) { var x Word = _B >> 1 for i := 0; i <= _W; i++ { @@ -479,7 +691,6 @@ func TestTrailingZeroBits(t *testing.T) { } } - var expNNTests = []struct { x, y, m string out string |