diff options
author | Nigel Tao <nigeltao@golang.org> | 2020-10-22 23:43:11 +1100 |
---|---|---|
committer | Nigel Tao <nigeltao@golang.org> | 2020-10-22 23:20:22 +0000 |
commit | ad36f871512ea54a9044e256b2743b8346b725dd (patch) | |
tree | 41f89e464e1b5c1661ff3be6d622473b124fae52 /src/strconv | |
parent | f1aa0b081e9a75b7757a8e08378aba0326911916 (diff) | |
download | go-ad36f871512ea54a9044e256b2743b8346b725dd.tar.gz go-ad36f871512ea54a9044e256b2743b8346b725dd.zip |
strconv: increase the Eisel-Lemire exp10 range
This grows the exp10 range for which the Eisel-Lemire algorithm applies
from [-307, +288] to [-348, +347], roughly equivalent to the existing
powersOfTen table in extfloat.go (which uses a different algorithm).
name old time/op new time/op delta
Atof64Decimal-4 48.4ns ± 1% 48.7ns ± 3% ~ (p=0.698 n=5+5)
Atof64Float-4 57.9ns ± 1% 58.1ns ± 2% ~ (p=0.873 n=5+5)
Atof64FloatExp-4 71.8ns ± 2% 72.2ns ± 2% ~ (p=0.730 n=5+5)
Atof64Big-4 165ns ± 1% 164ns ± 1% ~ (p=0.635 n=5+5)
Atof64RandomBits-4 165ns ± 1% 165ns ± 6% ~ (p=0.143 n=5+5)
Atof64RandomFloats-4 147ns ± 2% 147ns ± 1% ~ (p=0.857 n=5+5)
Change-Id: Idf7dc5297db6db2bd9e0bd4cb0e55e021916fa43
Reviewed-on: https://go-review.googlesource.com/c/go/+/264139
Reviewed-by: Robert Griesemer <gri@golang.org>
Trust: Robert Griesemer <gri@golang.org>
Trust: Nigel Tao <nigeltao@golang.org>
Diffstat (limited to 'src/strconv')
-rw-r--r-- | src/strconv/eisel_lemire.go | 124 |
1 files changed, 119 insertions, 5 deletions
diff --git a/src/strconv/eisel_lemire.go b/src/strconv/eisel_lemire.go index 5cd22a7e37..e548270688 100644 --- a/src/strconv/eisel_lemire.go +++ b/src/strconv/eisel_lemire.go @@ -27,14 +27,13 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) { // https://nigeltao.github.io/blog/2020/eisel-lemire.html blog post. // Exp10 Range. - const exp10Min, exp10Max = -307, +288 if man == 0 { if neg { f = math.Float64frombits(0x80000000_00000000) // Negative zero. } return f, true } - if exp10 < exp10Min || exp10Max < exp10 { + if exp10 < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < exp10 { return 0, false } @@ -44,11 +43,11 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) { retExp2 := uint64(217706*exp10>>16+1087) - uint64(clz) // Multiplication. - xHi, xLo := bits.Mul64(man, detailedPowersOfTen[exp10-exp10Min][1]) + xHi, xLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][1]) // Wider Approximation. if xHi&0x1FF == 0x1FF && xLo+man < man { - yHi, yLo := bits.Mul64(man, detailedPowersOfTen[exp10-exp10Min][0]) + yHi, yLo := bits.Mul64(man, detailedPowersOfTen[exp10-detailedPowersOfTenMinExp10][0]) mergedHi, mergedLo := xHi, xLo+yHi if mergedLo < xLo { mergedHi++ @@ -76,6 +75,14 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) { retMantissa >>= 1 retExp2 += 1 } + // retExp2 is a uint64. Zero or underflow means that we're in subnormal + // float64 space. 0x7FF or above means that we're in Inf/NaN float64 space. + // + // The if condition is equivalent to (but has fewer branches than): + // if retExp2 <= 0 || retExp2 >= 0x7FF { + if retExp2-1 >= 0x7FF-1 { + return 0, false + } retBits := retExp2<<52 | retMantissa&0x000FFFFF_FFFFFFFF if neg { retBits |= 0x80000000_00000000 @@ -83,6 +90,13 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) { return math.Float64frombits(retBits), true } +// detailedPowersOfTen{Min,Max}Exp10 is the power of 10 represented by the +// first and last rows of detailedPowersOfTen. Both bounds are inclusive. +const ( + detailedPowersOfTenMinExp10 = -348 + detailedPowersOfTenMaxExp10 = +347 +) + // detailedPowersOfTen contains 128-bit mantissa approximations (rounded down) // to the powers of 10. For example: // @@ -94,7 +108,48 @@ func eiselLemire(man uint64, exp10 int, neg bool) (f float64, ok bool) { // // The table was generated by // https://github.com/google/wuffs/blob/ba3818cb6b473a2ed0b38ecfc07dbbd3a97e8ae7/script/print-mpb-powers-of-10.go -var detailedPowersOfTen = [596][2]uint64{ +var detailedPowersOfTen = [...][2]uint64{ + {0x1732C869CD60E453, 0xFA8FD5A0081C0288}, // 1e-348 + {0x0E7FBD42205C8EB4, 0x9C99E58405118195}, // 1e-347 + {0x521FAC92A873B261, 0xC3C05EE50655E1FA}, // 1e-346 + {0xE6A797B752909EF9, 0xF4B0769E47EB5A78}, // 1e-345 + {0x9028BED2939A635C, 0x98EE4A22ECF3188B}, // 1e-344 + {0x7432EE873880FC33, 0xBF29DCABA82FDEAE}, // 1e-343 + {0x113FAA2906A13B3F, 0xEEF453D6923BD65A}, // 1e-342 + {0x4AC7CA59A424C507, 0x9558B4661B6565F8}, // 1e-341 + {0x5D79BCF00D2DF649, 0xBAAEE17FA23EBF76}, // 1e-340 + {0xF4D82C2C107973DC, 0xE95A99DF8ACE6F53}, // 1e-339 + {0x79071B9B8A4BE869, 0x91D8A02BB6C10594}, // 1e-338 + {0x9748E2826CDEE284, 0xB64EC836A47146F9}, // 1e-337 + {0xFD1B1B2308169B25, 0xE3E27A444D8D98B7}, // 1e-336 + {0xFE30F0F5E50E20F7, 0x8E6D8C6AB0787F72}, // 1e-335 + {0xBDBD2D335E51A935, 0xB208EF855C969F4F}, // 1e-334 + {0xAD2C788035E61382, 0xDE8B2B66B3BC4723}, // 1e-333 + {0x4C3BCB5021AFCC31, 0x8B16FB203055AC76}, // 1e-332 + {0xDF4ABE242A1BBF3D, 0xADDCB9E83C6B1793}, // 1e-331 + {0xD71D6DAD34A2AF0D, 0xD953E8624B85DD78}, // 1e-330 + {0x8672648C40E5AD68, 0x87D4713D6F33AA6B}, // 1e-329 + {0x680EFDAF511F18C2, 0xA9C98D8CCB009506}, // 1e-328 + {0x0212BD1B2566DEF2, 0xD43BF0EFFDC0BA48}, // 1e-327 + {0x014BB630F7604B57, 0x84A57695FE98746D}, // 1e-326 + {0x419EA3BD35385E2D, 0xA5CED43B7E3E9188}, // 1e-325 + {0x52064CAC828675B9, 0xCF42894A5DCE35EA}, // 1e-324 + {0x7343EFEBD1940993, 0x818995CE7AA0E1B2}, // 1e-323 + {0x1014EBE6C5F90BF8, 0xA1EBFB4219491A1F}, // 1e-322 + {0xD41A26E077774EF6, 0xCA66FA129F9B60A6}, // 1e-321 + {0x8920B098955522B4, 0xFD00B897478238D0}, // 1e-320 + {0x55B46E5F5D5535B0, 0x9E20735E8CB16382}, // 1e-319 + {0xEB2189F734AA831D, 0xC5A890362FDDBC62}, // 1e-318 + {0xA5E9EC7501D523E4, 0xF712B443BBD52B7B}, // 1e-317 + {0x47B233C92125366E, 0x9A6BB0AA55653B2D}, // 1e-316 + {0x999EC0BB696E840A, 0xC1069CD4EABE89F8}, // 1e-315 + {0xC00670EA43CA250D, 0xF148440A256E2C76}, // 1e-314 + {0x380406926A5E5728, 0x96CD2A865764DBCA}, // 1e-313 + {0xC605083704F5ECF2, 0xBC807527ED3E12BC}, // 1e-312 + {0xF7864A44C633682E, 0xEBA09271E88D976B}, // 1e-311 + {0x7AB3EE6AFBE0211D, 0x93445B8731587EA3}, // 1e-310 + {0x5960EA05BAD82964, 0xB8157268FDAE9E4C}, // 1e-309 + {0x6FB92487298E33BD, 0xE61ACF033D1A45DF}, // 1e-308 {0xA5D3B6D479F8E056, 0x8FD0C16206306BAB}, // 1e-307 {0x8F48A4899877186C, 0xB3C4F1BA87BC8696}, // 1e-306 {0x331ACDABFE94DE87, 0xE0B62E2929ABA83C}, // 1e-305 @@ -691,4 +746,63 @@ var detailedPowersOfTen = [596][2]uint64{ {0x49ED8EABCCCC485D, 0x867F59A9D4BED6C0}, // 1e286 {0x5C68F256BFFF5A74, 0xA81F301449EE8C70}, // 1e287 {0x73832EEC6FFF3111, 0xD226FC195C6A2F8C}, // 1e288 + {0xC831FD53C5FF7EAB, 0x83585D8FD9C25DB7}, // 1e289 + {0xBA3E7CA8B77F5E55, 0xA42E74F3D032F525}, // 1e290 + {0x28CE1BD2E55F35EB, 0xCD3A1230C43FB26F}, // 1e291 + {0x7980D163CF5B81B3, 0x80444B5E7AA7CF85}, // 1e292 + {0xD7E105BCC332621F, 0xA0555E361951C366}, // 1e293 + {0x8DD9472BF3FEFAA7, 0xC86AB5C39FA63440}, // 1e294 + {0xB14F98F6F0FEB951, 0xFA856334878FC150}, // 1e295 + {0x6ED1BF9A569F33D3, 0x9C935E00D4B9D8D2}, // 1e296 + {0x0A862F80EC4700C8, 0xC3B8358109E84F07}, // 1e297 + {0xCD27BB612758C0FA, 0xF4A642E14C6262C8}, // 1e298 + {0x8038D51CB897789C, 0x98E7E9CCCFBD7DBD}, // 1e299 + {0xE0470A63E6BD56C3, 0xBF21E44003ACDD2C}, // 1e300 + {0x1858CCFCE06CAC74, 0xEEEA5D5004981478}, // 1e301 + {0x0F37801E0C43EBC8, 0x95527A5202DF0CCB}, // 1e302 + {0xD30560258F54E6BA, 0xBAA718E68396CFFD}, // 1e303 + {0x47C6B82EF32A2069, 0xE950DF20247C83FD}, // 1e304 + {0x4CDC331D57FA5441, 0x91D28B7416CDD27E}, // 1e305 + {0xE0133FE4ADF8E952, 0xB6472E511C81471D}, // 1e306 + {0x58180FDDD97723A6, 0xE3D8F9E563A198E5}, // 1e307 + {0x570F09EAA7EA7648, 0x8E679C2F5E44FF8F}, // 1e308 + {0x2CD2CC6551E513DA, 0xB201833B35D63F73}, // 1e309 + {0xF8077F7EA65E58D1, 0xDE81E40A034BCF4F}, // 1e310 + {0xFB04AFAF27FAF782, 0x8B112E86420F6191}, // 1e311 + {0x79C5DB9AF1F9B563, 0xADD57A27D29339F6}, // 1e312 + {0x18375281AE7822BC, 0xD94AD8B1C7380874}, // 1e313 + {0x8F2293910D0B15B5, 0x87CEC76F1C830548}, // 1e314 + {0xB2EB3875504DDB22, 0xA9C2794AE3A3C69A}, // 1e315 + {0x5FA60692A46151EB, 0xD433179D9C8CB841}, // 1e316 + {0xDBC7C41BA6BCD333, 0x849FEEC281D7F328}, // 1e317 + {0x12B9B522906C0800, 0xA5C7EA73224DEFF3}, // 1e318 + {0xD768226B34870A00, 0xCF39E50FEAE16BEF}, // 1e319 + {0xE6A1158300D46640, 0x81842F29F2CCE375}, // 1e320 + {0x60495AE3C1097FD0, 0xA1E53AF46F801C53}, // 1e321 + {0x385BB19CB14BDFC4, 0xCA5E89B18B602368}, // 1e322 + {0x46729E03DD9ED7B5, 0xFCF62C1DEE382C42}, // 1e323 + {0x6C07A2C26A8346D1, 0x9E19DB92B4E31BA9}, // 1e324 + {0xC7098B7305241885, 0xC5A05277621BE293}, // 1e325 + {0xB8CBEE4FC66D1EA7, 0xF70867153AA2DB38}, // 1e326 + {0x737F74F1DC043328, 0x9A65406D44A5C903}, // 1e327 + {0x505F522E53053FF2, 0xC0FE908895CF3B44}, // 1e328 + {0x647726B9E7C68FEF, 0xF13E34AABB430A15}, // 1e329 + {0x5ECA783430DC19F5, 0x96C6E0EAB509E64D}, // 1e330 + {0xB67D16413D132072, 0xBC789925624C5FE0}, // 1e331 + {0xE41C5BD18C57E88F, 0xEB96BF6EBADF77D8}, // 1e332 + {0x8E91B962F7B6F159, 0x933E37A534CBAAE7}, // 1e333 + {0x723627BBB5A4ADB0, 0xB80DC58E81FE95A1}, // 1e334 + {0xCEC3B1AAA30DD91C, 0xE61136F2227E3B09}, // 1e335 + {0x213A4F0AA5E8A7B1, 0x8FCAC257558EE4E6}, // 1e336 + {0xA988E2CD4F62D19D, 0xB3BD72ED2AF29E1F}, // 1e337 + {0x93EB1B80A33B8605, 0xE0ACCFA875AF45A7}, // 1e338 + {0xBC72F130660533C3, 0x8C6C01C9498D8B88}, // 1e339 + {0xEB8FAD7C7F8680B4, 0xAF87023B9BF0EE6A}, // 1e340 + {0xA67398DB9F6820E1, 0xDB68C2CA82ED2A05}, // 1e341 + {0x88083F8943A1148C, 0x892179BE91D43A43}, // 1e342 + {0x6A0A4F6B948959B0, 0xAB69D82E364948D4}, // 1e343 + {0x848CE34679ABB01C, 0xD6444E39C3DB9B09}, // 1e344 + {0xF2D80E0C0C0B4E11, 0x85EAB0E41A6940E5}, // 1e345 + {0x6F8E118F0F0E2195, 0xA7655D1D2103911F}, // 1e346 + {0x4B7195F2D2D1A9FB, 0xD13EB46469447567}, // 1e347 } |