diff options
author | Josh Bleecher Snyder <josharian@gmail.com> | 2017-02-03 22:40:56 -0800 |
---|---|---|
committer | Josh Bleecher Snyder <josharian@gmail.com> | 2017-02-28 19:23:33 +0000 |
commit | 504bc3ed24765294cf3d665a68d57a6e4fc7d23a (patch) | |
tree | 0ae798ff8e6a9ef2fbc1d27232e19a43e829f9ca /src/runtime/iface.go | |
parent | f6fc0dd620a838be3c98acc798ba58d6cbb0bac2 (diff) | |
download | go-504bc3ed24765294cf3d665a68d57a6e4fc7d23a.tar.gz go-504bc3ed24765294cf3d665a68d57a6e4fc7d23a.zip |
cmd/compile, runtime: specialize convT2x, don't alloc for zero vals
Prior to this CL, all runtime conversions
from a concrete value to an interface went
through one of two runtime calls: convT2E or convT2I.
However, in practice, basic types are very common.
Specializing convT2x for those basic types allows
for a more efficient implementation for those types.
For basic scalars and strings, allocation and copying
can use the same methods as normal code.
For pointer-free types, allocation can occur without
zeroing, and copying can take place without GC calls.
For slices, copying is cheaper and simpler.
This CL adds twelve runtime routines:
convT2E16, convT2I16
convT2E32, convT2I32
convT2E64, convT2I64
convT2Estring, convT2Istring
convT2Eslice, convT2Islice
convT2Enoptr, convT2Inoptr
While compiling make.bash, 93% of all convT2x calls
are now to one of these specialized convT2x call.
Within specialized convT2x routines, it is cheap to check
for a zero value, in a way that it is not in general.
When we detect a zero value there, we return a pointer
to zeroVal, rather than allocating.
name old time/op new time/op delta
ConvT2Ezero/zero/16-8 17.9ns ± 2% 3.0ns ± 3% -83.20% (p=0.000 n=56+56)
ConvT2Ezero/zero/32-8 17.8ns ± 2% 3.0ns ± 3% -83.15% (p=0.000 n=59+60)
ConvT2Ezero/zero/64-8 20.1ns ± 1% 3.0ns ± 2% -84.98% (p=0.000 n=57+57)
ConvT2Ezero/zero/str-8 32.6ns ± 1% 3.0ns ± 4% -90.70% (p=0.000 n=59+60)
ConvT2Ezero/zero/slice-8 36.7ns ± 2% 3.0ns ± 2% -91.78% (p=0.000 n=59+59)
ConvT2Ezero/zero/big-8 91.9ns ± 2% 85.9ns ± 2% -6.52% (p=0.000 n=57+57)
ConvT2Ezero/nonzero/16-8 17.7ns ± 2% 12.7ns ± 3% -28.38% (p=0.000 n=55+60)
ConvT2Ezero/nonzero/32-8 17.8ns ± 1% 12.7ns ± 1% -28.44% (p=0.000 n=54+57)
ConvT2Ezero/nonzero/64-8 20.0ns ± 1% 15.0ns ± 1% -24.90% (p=0.000 n=56+58)
ConvT2Ezero/nonzero/str-8 32.6ns ± 1% 25.7ns ± 1% -21.17% (p=0.000 n=58+55)
ConvT2Ezero/nonzero/slice-8 36.8ns ± 2% 30.4ns ± 1% -17.32% (p=0.000 n=60+52)
ConvT2Ezero/nonzero/big-8 92.1ns ± 2% 85.9ns ± 2% -6.70% (p=0.000 n=57+59)
Benchmarks on a real program (the compiler):
name old time/op new time/op delta
Template 227ms ± 5% 221ms ± 2% -2.48% (p=0.000 n=30+26)
Unicode 102ms ± 5% 100ms ± 3% -1.30% (p=0.009 n=30+26)
GoTypes 656ms ± 5% 659ms ± 4% ~ (p=0.208 n=30+30)
Compiler 2.82s ± 2% 2.82s ± 1% ~ (p=0.614 n=29+27)
Flate 128ms ± 2% 128ms ± 5% ~ (p=0.783 n=27+28)
GoParser 158ms ± 3% 158ms ± 3% ~ (p=0.261 n=28+30)
Reflect 408ms ± 7% 401ms ± 3% ~ (p=0.075 n=30+30)
Tar 123ms ± 6% 121ms ± 8% ~ (p=0.287 n=29+30)
XML 220ms ± 2% 220ms ± 4% ~ (p=0.805 n=29+29)
name old user-ns/op new user-ns/op delta
Template 281user-ms ± 4% 279user-ms ± 3% -0.87% (p=0.044 n=28+28)
Unicode 142user-ms ± 4% 141user-ms ± 3% -1.04% (p=0.015 n=30+27)
GoTypes 884user-ms ± 3% 886user-ms ± 2% ~ (p=0.532 n=30+30)
Compiler 3.94user-s ± 3% 3.92user-s ± 1% ~ (p=0.185 n=30+28)
Flate 165user-ms ± 2% 165user-ms ± 4% ~ (p=0.780 n=27+29)
GoParser 209user-ms ± 2% 208user-ms ± 3% ~ (p=0.453 n=28+30)
Reflect 533user-ms ± 6% 526user-ms ± 3% ~ (p=0.057 n=30+30)
Tar 156user-ms ± 6% 154user-ms ± 6% ~ (p=0.133 n=29+30)
XML 288user-ms ± 4% 288user-ms ± 4% ~ (p=0.633 n=30+30)
name old alloc/op new alloc/op delta
Template 41.0MB ± 0% 40.9MB ± 0% -0.11% (p=0.000 n=29+29)
Unicode 32.6MB ± 0% 32.6MB ± 0% ~ (p=0.572 n=29+30)
GoTypes 122MB ± 0% 122MB ± 0% -0.10% (p=0.000 n=30+30)
Compiler 482MB ± 0% 481MB ± 0% -0.07% (p=0.000 n=30+29)
Flate 26.6MB ± 0% 26.6MB ± 0% ~ (p=0.096 n=30+30)
GoParser 32.7MB ± 0% 32.6MB ± 0% -0.06% (p=0.011 n=28+28)
Reflect 84.2MB ± 0% 84.1MB ± 0% -0.17% (p=0.000 n=29+30)
Tar 27.7MB ± 0% 27.7MB ± 0% -0.05% (p=0.032 n=27+28)
XML 44.7MB ± 0% 44.7MB ± 0% ~ (p=0.131 n=28+30)
name old allocs/op new allocs/op delta
Template 373k ± 1% 370k ± 1% -0.76% (p=0.000 n=30+30)
Unicode 325k ± 1% 325k ± 1% ~ (p=0.383 n=29+30)
GoTypes 1.16M ± 0% 1.15M ± 0% -0.75% (p=0.000 n=29+30)
Compiler 4.15M ± 0% 4.13M ± 0% -0.59% (p=0.000 n=30+29)
Flate 238k ± 1% 237k ± 1% -0.62% (p=0.000 n=30+30)
GoParser 304k ± 1% 302k ± 1% -0.64% (p=0.000 n=30+28)
Reflect 1.00M ± 0% 0.99M ± 0% -1.10% (p=0.000 n=29+30)
Tar 245k ± 1% 244k ± 1% -0.59% (p=0.000 n=27+29)
XML 391k ± 1% 389k ± 1% -0.59% (p=0.000 n=29+30)
Change-Id: Id7f456d690567c2b0a96b0d6d64de8784b6e305f
Reviewed-on: https://go-review.googlesource.com/36476
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/runtime/iface.go')
-rw-r--r-- | src/runtime/iface.go | 240 |
1 files changed, 228 insertions, 12 deletions
diff --git a/src/runtime/iface.go b/src/runtime/iface.go index f043724a56..58ed61e3aa 100644 --- a/src/runtime/iface.go +++ b/src/runtime/iface.go @@ -205,19 +205,124 @@ func convT2E(t *_type, elem unsafe.Pointer) (e eface) { if msanenabled { msanread(elem, t.size) } - if isDirectIface(t) { - // This case is implemented directly by the compiler. - throw("direct convT2E") - } - x := newobject(t) - // TODO: We allocate a zeroed object only to overwrite it with - // actual data. Figure out how to avoid zeroing. Also below in convT2I. + x := mallocgc(t.size, t, true) + // TODO: We allocate a zeroed object only to overwrite it with actual data. + // Figure out how to avoid zeroing. Also below in convT2Eslice, convT2I, convT2Islice. typedmemmove(t, x, elem) e._type = t e.data = x return } +func convT2E16(t *_type, elem unsafe.Pointer) (e eface) { + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E16)) + } + if msanenabled { + msanread(elem, t.size) + } + var x unsafe.Pointer + if *(*uint16)(elem) == 0 { + x = unsafe.Pointer(&zeroVal[0]) + } else { + x = mallocgc(2, t, false) + *(*uint16)(x) = *(*uint16)(elem) + } + e._type = t + e.data = x + return +} + +func convT2E32(t *_type, elem unsafe.Pointer) (e eface) { + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E32)) + } + if msanenabled { + msanread(elem, t.size) + } + var x unsafe.Pointer + if *(*uint32)(elem) == 0 { + x = unsafe.Pointer(&zeroVal[0]) + } else { + x = mallocgc(4, t, false) + *(*uint32)(x) = *(*uint32)(elem) + } + e._type = t + e.data = x + return +} + +func convT2E64(t *_type, elem unsafe.Pointer) (e eface) { + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2E64)) + } + if msanenabled { + msanread(elem, t.size) + } + var x unsafe.Pointer + if *(*uint64)(elem) == 0 { + x = unsafe.Pointer(&zeroVal[0]) + } else { + x = mallocgc(8, t, false) + *(*uint64)(x) = *(*uint64)(elem) + } + e._type = t + e.data = x + return +} + +func convT2Estring(t *_type, elem unsafe.Pointer) (e eface) { + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2Estring)) + } + if msanenabled { + msanread(elem, t.size) + } + var x unsafe.Pointer + if *(*string)(elem) == "" { + x = unsafe.Pointer(&zeroVal[0]) + } else { + x = mallocgc(t.size, t, true) + *(*string)(x) = *(*string)(elem) + } + e._type = t + e.data = x + return +} + +func convT2Eslice(t *_type, elem unsafe.Pointer) (e eface) { + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2Eslice)) + } + if msanenabled { + msanread(elem, t.size) + } + var x unsafe.Pointer + if v := *(*slice)(elem); uintptr(v.array) == 0 { + x = unsafe.Pointer(&zeroVal[0]) + } else { + x = mallocgc(t.size, t, true) + *(*slice)(x) = *(*slice)(elem) + } + e._type = t + e.data = x + return +} + +func convT2Enoptr(t *_type, elem unsafe.Pointer) (e eface) { + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&t)), funcPC(convT2Enoptr)) + } + if msanenabled { + msanread(elem, t.size) + } + x := mallocgc(t.size, t, false) + memmove(x, elem, t.size) + e._type = t + e.data = x + return +} + func convT2I(tab *itab, elem unsafe.Pointer) (i iface) { t := tab._type if raceenabled { @@ -226,17 +331,128 @@ func convT2I(tab *itab, elem unsafe.Pointer) (i iface) { if msanenabled { msanread(elem, t.size) } - if isDirectIface(t) { - // This case is implemented directly by the compiler. - throw("direct convT2I") - } - x := newobject(t) + x := mallocgc(t.size, t, true) typedmemmove(t, x, elem) i.tab = tab i.data = x return } +func convT2I16(tab *itab, elem unsafe.Pointer) (i iface) { + t := tab._type + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I16)) + } + if msanenabled { + msanread(elem, t.size) + } + var x unsafe.Pointer + if *(*uint16)(elem) == 0 { + x = unsafe.Pointer(&zeroVal[0]) + } else { + x = mallocgc(2, t, false) + *(*uint16)(x) = *(*uint16)(elem) + } + i.tab = tab + i.data = x + return +} + +func convT2I32(tab *itab, elem unsafe.Pointer) (i iface) { + t := tab._type + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I32)) + } + if msanenabled { + msanread(elem, t.size) + } + var x unsafe.Pointer + if *(*uint32)(elem) == 0 { + x = unsafe.Pointer(&zeroVal[0]) + } else { + x = mallocgc(4, t, false) + *(*uint32)(x) = *(*uint32)(elem) + } + i.tab = tab + i.data = x + return +} + +func convT2I64(tab *itab, elem unsafe.Pointer) (i iface) { + t := tab._type + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2I64)) + } + if msanenabled { + msanread(elem, t.size) + } + var x unsafe.Pointer + if *(*uint64)(elem) == 0 { + x = unsafe.Pointer(&zeroVal[0]) + } else { + x = mallocgc(8, t, false) + *(*uint64)(x) = *(*uint64)(elem) + } + i.tab = tab + i.data = x + return +} + +func convT2Istring(tab *itab, elem unsafe.Pointer) (i iface) { + t := tab._type + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2Istring)) + } + if msanenabled { + msanread(elem, t.size) + } + var x unsafe.Pointer + if *(*string)(elem) == "" { + x = unsafe.Pointer(&zeroVal[0]) + } else { + x = mallocgc(t.size, t, true) + *(*string)(x) = *(*string)(elem) + } + i.tab = tab + i.data = x + return +} + +func convT2Islice(tab *itab, elem unsafe.Pointer) (i iface) { + t := tab._type + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2Islice)) + } + if msanenabled { + msanread(elem, t.size) + } + var x unsafe.Pointer + if v := *(*slice)(elem); uintptr(v.array) == 0 { + x = unsafe.Pointer(&zeroVal[0]) + } else { + x = mallocgc(t.size, t, true) + *(*slice)(x) = *(*slice)(elem) + } + i.tab = tab + i.data = x + return +} + +func convT2Inoptr(tab *itab, elem unsafe.Pointer) (i iface) { + t := tab._type + if raceenabled { + raceReadObjectPC(t, elem, getcallerpc(unsafe.Pointer(&tab)), funcPC(convT2Inoptr)) + } + if msanenabled { + msanread(elem, t.size) + } + x := mallocgc(t.size, t, false) + memmove(x, elem, t.size) + i.tab = tab + i.data = x + return +} + func convI2I(inter *interfacetype, i iface) (r iface) { tab := i.tab if tab == nil { |