aboutsummaryrefslogtreecommitdiff
path: root/src/strconv
diff options
context:
space:
mode:
authorNigel Tao <nigeltao@golang.org>2020-10-22 23:43:11 +1100
committerNigel Tao <nigeltao@golang.org>2020-10-22 23:20:22 +0000
commitad36f871512ea54a9044e256b2743b8346b725dd (patch)
tree41f89e464e1b5c1661ff3be6d622473b124fae52 /src/strconv
parentf1aa0b081e9a75b7757a8e08378aba0326911916 (diff)
downloadgo-ad36f871512ea54a9044e256b2743b8346b725dd.tar.gz
go-ad36f871512ea54a9044e256b2743b8346b725dd.zip
strconv: increase the Eisel-Lemire exp10 range
This grows the exp10 range for which the Eisel-Lemire algorithm applies from [-307, +288] to [-348, +347], roughly equivalent to the existing powersOfTen table in extfloat.go (which uses a different algorithm). name old time/op new time/op delta Atof64Decimal-4 48.4ns ± 1% 48.7ns ± 3% ~ (p=0.698 n=5+5) Atof64Float-4 57.9ns ± 1% 58.1ns ± 2% ~ (p=0.873 n=5+5) Atof64FloatExp-4 71.8ns ± 2% 72.2ns ± 2% ~ (p=0.730 n=5+5) Atof64Big-4 165ns ± 1% 164ns ± 1% ~ (p=0.635 n=5+5) Atof64RandomBits-4 165ns ± 1% 165ns ± 6% ~ (p=0.143 n=5+5) Atof64RandomFloats-4 147ns ± 2% 147ns ± 1% ~ (p=0.857 n=5+5) Change-Id: Idf7dc5297db6db2bd9e0bd4cb0e55e021916fa43 Reviewed-on: https://go-review.googlesource.com/c/go/+/264139 Reviewed-by: Robert Griesemer <gri@golang.org> Trust: Robert Griesemer <gri@golang.org> Trust: Nigel Tao <nigeltao@golang.org>
Diffstat (limited to 'src/strconv')
-rw-r--r--src/strconv/eisel_lemire.go124
1 files changed, 119 insertions, 5 deletions
diff --git a/src/strconv/eisel_lemire.go b/src/strconv/eisel_lemire.go
index 5cd22a7e37..e548270688 100644
--- a/src/strconv/eisel_lemire.go
+++ b/src/strconv/eisel_lemire.go
@@ -27,14 +27,13 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
// https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post.
// Exp10 Range.
- const exp10Min, exp10Max = -307, +288
if man == 0 {
if neg {
f = math.Float64frombits(0x80000000_00000000) // Negative zero.
}
return f, true
}
- if exp10 < exp10Min || exp10Max < exp10 {
+ if exp10 < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < exp10 {
return 0, false
}
@@ -44,11 +43,11 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
retExp2 := uint64(217706*exp10>>16+1087) - uint64(clz)
// Multiplication.
- xHi, xLo := bits.Mul64(man, detailedPowersOfTen[exp10-exp10Min][1])
+ xHi, xLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][1])
// Wider Approximation.
if xHi&0x1FF == 0x1FF && xLo+man < man {
- yHi, yLo := bits.Mul64(man, detailedPowersOfTen[exp10-exp10Min][0])
+ yHi, yLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][0])
mergedHi, mergedLo := xHi, xLo+yHi
if mergedLo < xLo {
mergedHi++
@@ -76,6 +75,14 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
retMantissa >>= 1
retExp2 += 1
}
+ // retExp2 is a uint64. Zero or underflow means that we're in subnormal
+ // float64 space. 0x7FF or above means that we're in Inf/NaN float64 space.
+ //
+ // The if condition is equivalent to (but has fewer branches than):
+ // if retExp2 <= 0 || retExp2 >= 0x7FF {
+ if retExp2-1 >= 0x7FF-1 {
+ return 0, false
+ }
retBits := retExp2<<52 | retMantissa&0x000FFFFF_FFFFFFFF
if neg {
retBits |= 0x80000000_00000000
@@ -83,6 +90,13 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
return math.Float64frombits(retBits), true
}
+// detailedPowersOfTen{Min,Max}Exp10 is the power of 10 represented by the
+// first and last rows of detailedPowersOfTen. Both bounds are inclusive.
+const (
+ detailedPowersOfTenMinExp10 = -348
+ detailedPowersOfTenMaxExp10 = +347
+)
+
// detailedPowersOfTen contains 128-bit mantissa approximations (rounded down)
// to the powers of 10. For example:
//
@@ -94,7 +108,48 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) {
//
// The table was generated by
// https://github.com/google/wuffs/blob/ba3818cb6b473a2ed0b38ecfc07dbbd3a97e8ae7/script/print-mpb-powers-of-10.go
-var detailedPowersOfTen = [596][2]uint64{
+var detailedPowersOfTen = [...][2]uint64{
+ {0x1732C869CD60E453, 0xFA8FD5A0081C0288}, // 1e-348
+ {0x0E7FBD42205C8EB4, 0x9C99E58405118195}, // 1e-347
+ {0x521FAC92A873B261, 0xC3C05EE50655E1FA}, // 1e-346
+ {0xE6A797B752909EF9, 0xF4B0769E47EB5A78}, // 1e-345
+ {0x9028BED2939A635C, 0x98EE4A22ECF3188B}, // 1e-344
+ {0x7432EE873880FC33, 0xBF29DCABA82FDEAE}, // 1e-343
+ {0x113FAA2906A13B3F, 0xEEF453D6923BD65A}, // 1e-342
+ {0x4AC7CA59A424C507, 0x9558B4661B6565F8}, // 1e-341
+ {0x5D79BCF00D2DF649, 0xBAAEE17FA23EBF76}, // 1e-340
+ {0xF4D82C2C107973DC, 0xE95A99DF8ACE6F53}, // 1e-339
+ {0x79071B9B8A4BE869, 0x91D8A02BB6C10594}, // 1e-338
+ {0x9748E2826CDEE284, 0xB64EC836A47146F9}, // 1e-337
+ {0xFD1B1B2308169B25, 0xE3E27A444D8D98B7}, // 1e-336
+ {0xFE30F0F5E50E20F7, 0x8E6D8C6AB0787F72}, // 1e-335
+ {0xBDBD2D335E51A935, 0xB208EF855C969F4F}, // 1e-334
+ {0xAD2C788035E61382, 0xDE8B2B66B3BC4723}, // 1e-333
+ {0x4C3BCB5021AFCC31, 0x8B16FB203055AC76}, // 1e-332
+ {0xDF4ABE242A1BBF3D, 0xADDCB9E83C6B1793}, // 1e-331
+ {0xD71D6DAD34A2AF0D, 0xD953E8624B85DD78}, // 1e-330
+ {0x8672648C40E5AD68, 0x87D4713D6F33AA6B}, // 1e-329
+ {0x680EFDAF511F18C2, 0xA9C98D8CCB009506}, // 1e-328
+ {0x0212BD1B2566DEF2, 0xD43BF0EFFDC0BA48}, // 1e-327
+ {0x014BB630F7604B57, 0x84A57695FE98746D}, // 1e-326
+ {0x419EA3BD35385E2D, 0xA5CED43B7E3E9188}, // 1e-325
+ {0x52064CAC828675B9, 0xCF42894A5DCE35EA}, // 1e-324
+ {0x7343EFEBD1940993, 0x818995CE7AA0E1B2}, // 1e-323
+ {0x1014EBE6C5F90BF8, 0xA1EBFB4219491A1F}, // 1e-322
+ {0xD41A26E077774EF6, 0xCA66FA129F9B60A6}, // 1e-321
+ {0x8920B098955522B4, 0xFD00B897478238D0}, // 1e-320
+ {0x55B46E5F5D5535B0, 0x9E20735E8CB16382}, // 1e-319
+ {0xEB2189F734AA831D, 0xC5A890362FDDBC62}, // 1e-318
+ {0xA5E9EC7501D523E4, 0xF712B443BBD52B7B}, // 1e-317
+ {0x47B233C92125366E, 0x9A6BB0AA55653B2D}, // 1e-316
+ {0x999EC0BB696E840A, 0xC1069CD4EABE89F8}, // 1e-315
+ {0xC00670EA43CA250D, 0xF148440A256E2C76}, // 1e-314
+ {0x380406926A5E5728, 0x96CD2A865764DBCA}, // 1e-313
+ {0xC605083704F5ECF2, 0xBC807527ED3E12BC}, // 1e-312
+ {0xF7864A44C633682E, 0xEBA09271E88D976B}, // 1e-311
+ {0x7AB3EE6AFBE0211D, 0x93445B8731587EA3}, // 1e-310
+ {0x5960EA05BAD82964, 0xB8157268FDAE9E4C}, // 1e-309
+ {0x6FB92487298E33BD, 0xE61ACF033D1A45DF}, // 1e-308
{0xA5D3B6D479F8E056, 0x8FD0C16206306BAB}, // 1e-307
{0x8F48A4899877186C, 0xB3C4F1BA87BC8696}, // 1e-306
{0x331ACDABFE94DE87, 0xE0B62E2929ABA83C}, // 1e-305
@@ -691,4 +746,63 @@ var detailedPowersOfTen = [596][2]uint64{
{0x49ED8EABCCCC485D, 0x867F59A9D4BED6C0}, // 1e286
{0x5C68F256BFFF5A74, 0xA81F301449EE8C70}, // 1e287
{0x73832EEC6FFF3111, 0xD226FC195C6A2F8C}, // 1e288
+ {0xC831FD53C5FF7EAB, 0x83585D8FD9C25DB7}, // 1e289
+ {0xBA3E7CA8B77F5E55, 0xA42E74F3D032F525}, // 1e290
+ {0x28CE1BD2E55F35EB, 0xCD3A1230C43FB26F}, // 1e291
+ {0x7980D163CF5B81B3, 0x80444B5E7AA7CF85}, // 1e292
+ {0xD7E105BCC332621F, 0xA0555E361951C366}, // 1e293
+ {0x8DD9472BF3FEFAA7, 0xC86AB5C39FA63440}, // 1e294
+ {0xB14F98F6F0FEB951, 0xFA856334878FC150}, // 1e295
+ {0x6ED1BF9A569F33D3, 0x9C935E00D4B9D8D2}, // 1e296
+ {0x0A862F80EC4700C8, 0xC3B8358109E84F07}, // 1e297
+ {0xCD27BB612758C0FA, 0xF4A642E14C6262C8}, // 1e298
+ {0x8038D51CB897789C, 0x98E7E9CCCFBD7DBD}, // 1e299
+ {0xE0470A63E6BD56C3, 0xBF21E44003ACDD2C}, // 1e300
+ {0x1858CCFCE06CAC74, 0xEEEA5D5004981478}, // 1e301
+ {0x0F37801E0C43EBC8, 0x95527A5202DF0CCB}, // 1e302
+ {0xD30560258F54E6BA, 0xBAA718E68396CFFD}, // 1e303
+ {0x47C6B82EF32A2069, 0xE950DF20247C83FD}, // 1e304
+ {0x4CDC331D57FA5441, 0x91D28B7416CDD27E}, // 1e305
+ {0xE0133FE4ADF8E952, 0xB6472E511C81471D}, // 1e306
+ {0x58180FDDD97723A6, 0xE3D8F9E563A198E5}, // 1e307
+ {0x570F09EAA7EA7648, 0x8E679C2F5E44FF8F}, // 1e308
+ {0x2CD2CC6551E513DA, 0xB201833B35D63F73}, // 1e309
+ {0xF8077F7EA65E58D1, 0xDE81E40A034BCF4F}, // 1e310
+ {0xFB04AFAF27FAF782, 0x8B112E86420F6191}, // 1e311
+ {0x79C5DB9AF1F9B563, 0xADD57A27D29339F6}, // 1e312
+ {0x18375281AE7822BC, 0xD94AD8B1C7380874}, // 1e313
+ {0x8F2293910D0B15B5, 0x87CEC76F1C830548}, // 1e314
+ {0xB2EB3875504DDB22, 0xA9C2794AE3A3C69A}, // 1e315
+ {0x5FA60692A46151EB, 0xD433179D9C8CB841}, // 1e316
+ {0xDBC7C41BA6BCD333, 0x849FEEC281D7F328}, // 1e317
+ {0x12B9B522906C0800, 0xA5C7EA73224DEFF3}, // 1e318
+ {0xD768226B34870A00, 0xCF39E50FEAE16BEF}, // 1e319
+ {0xE6A1158300D46640, 0x81842F29F2CCE375}, // 1e320
+ {0x60495AE3C1097FD0, 0xA1E53AF46F801C53}, // 1e321
+ {0x385BB19CB14BDFC4, 0xCA5E89B18B602368}, // 1e322
+ {0x46729E03DD9ED7B5, 0xFCF62C1DEE382C42}, // 1e323
+ {0x6C07A2C26A8346D1, 0x9E19DB92B4E31BA9}, // 1e324
+ {0xC7098B7305241885, 0xC5A05277621BE293}, // 1e325
+ {0xB8CBEE4FC66D1EA7, 0xF70867153AA2DB38}, // 1e326
+ {0x737F74F1DC043328, 0x9A65406D44A5C903}, // 1e327
+ {0x505F522E53053FF2, 0xC0FE908895CF3B44}, // 1e328
+ {0x647726B9E7C68FEF, 0xF13E34AABB430A15}, // 1e329
+ {0x5ECA783430DC19F5, 0x96C6E0EAB509E64D}, // 1e330
+ {0xB67D16413D132072, 0xBC789925624C5FE0}, // 1e331
+ {0xE41C5BD18C57E88F, 0xEB96BF6EBADF77D8}, // 1e332
+ {0x8E91B962F7B6F159, 0x933E37A534CBAAE7}, // 1e333
+ {0x723627BBB5A4ADB0, 0xB80DC58E81FE95A1}, // 1e334
+ {0xCEC3B1AAA30DD91C, 0xE61136F2227E3B09}, // 1e335
+ {0x213A4F0AA5E8A7B1, 0x8FCAC257558EE4E6}, // 1e336
+ {0xA988E2CD4F62D19D, 0xB3BD72ED2AF29E1F}, // 1e337
+ {0x93EB1B80A33B8605, 0xE0ACCFA875AF45A7}, // 1e338
+ {0xBC72F130660533C3, 0x8C6C01C9498D8B88}, // 1e339
+ {0xEB8FAD7C7F8680B4, 0xAF87023B9BF0EE6A}, // 1e340
+ {0xA67398DB9F6820E1, 0xDB68C2CA82ED2A05}, // 1e341
+ {0x88083F8943A1148C, 0x892179BE91D43A43}, // 1e342
+ {0x6A0A4F6B948959B0, 0xAB69D82E364948D4}, // 1e343
+ {0x848CE34679ABB01C, 0xD6444E39C3DB9B09}, // 1e344
+ {0xF2D80E0C0C0B4E11, 0x85EAB0E41A6940E5}, // 1e345
+ {0x6F8E118F0F0E2195, 0xA7655D1D2103911F}, // 1e346
+ {0x4B7195F2D2D1A9FB, 0xD13EB46469447567}, // 1e347
}