diff options
author | Alex Gaynor <alex.gaynor@gmail.com> | 2020-04-27 23:10:23 +0000 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2020-04-28 00:53:32 +0000 |
commit | c2e0f01598fbc17d2f960fe93c5bcb057203b75d (patch) | |
tree | 544402aa7986b8c1fcf7e0957e663f9932a338fd /src/bufio | |
parent | 42c48998aada0df10279650d04a018c83cbfa518 (diff) | |
download | go-c2e0f01598fbc17d2f960fe93c5bcb057203b75d.tar.gz go-c2e0f01598fbc17d2f960fe93c5bcb057203b75d.zip |
bufio: optimize bufio.Reader.ReadString to avoid an allocation and copy
name old time/op new time/op delta
ReaderReadString-4 226ns ±12% 161ns ±11% -28.76% (p=0.008 n=5+5)
name old alloc/op new alloc/op delta
ReaderReadString-4 288B ± 0% 144B ± 0% -50.00% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
ReaderReadString-4 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.008 n=5+5)
Change-Id: I77f330b8340c2bfbfff1f6f1000170b65953a200
GitHub-Last-Rev: 65d65302a7b80504b4d37b81a3843fe1439e638a
GitHub-Pull-Request: golang/go#34706
Reviewed-on: https://go-review.googlesource.com/c/go/+/199257
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/bufio')
-rw-r--r-- | src/bufio/bufio.go | 52 | ||||
-rw-r--r-- | src/bufio/bufio_test.go | 32 |
2 files changed, 66 insertions, 18 deletions
diff --git a/src/bufio/bufio.go b/src/bufio/bufio.go index f0810be3a4..7cbd5424ea 100644 --- a/src/bufio/bufio.go +++ b/src/bufio/bufio.go @@ -11,6 +11,7 @@ import ( "bytes" "errors" "io" + "strings" "unicode/utf8" ) @@ -419,20 +420,16 @@ func (b *Reader) ReadLine() (line []byte, isPrefix bool, err error) { return } -// ReadBytes reads until the first occurrence of delim in the input, -// returning a slice containing the data up to and including the delimiter. -// If ReadBytes encounters an error before finding a delimiter, -// it returns the data read before the error and the error itself (often io.EOF). -// ReadBytes returns err != nil if and only if the returned data does not end in -// delim. -// For simple uses, a Scanner may be more convenient. -func (b *Reader) ReadBytes(delim byte) ([]byte, error) { - // Use ReadSlice to look for array, - // accumulating full buffers. +// collectFragments reads until the first occurrence of delim in the input. It +// returns (slice of full buffers, remaining bytes before delim, total number +// of bytes in the combined first two elements, error). +// The complete result is equal to +// `bytes.Join(append(fullBuffers, finalFragment), nil)`, which has a +// length of `totalLen`. The result is strucured in this way to allow callers +// to minimize allocations and copies. +func (b *Reader) collectFragments(delim byte) (fullBuffers [][]byte, finalFragment []byte, totalLen int, err error) { var frag []byte - var full [][]byte - var err error - n := 0 + // Use ReadSlice to look for delim, accumulating full buffers. for { var e error frag, e = b.ReadSlice(delim) @@ -447,12 +444,23 @@ func (b *Reader) ReadBytes(delim byte) ([]byte, error) { // Make a copy of the buffer. buf := make([]byte, len(frag)) copy(buf, frag) - full = append(full, buf) - n += len(buf) + fullBuffers = append(fullBuffers, buf) + totalLen += len(buf) } - n += len(frag) + totalLen += len(frag) + return fullBuffers, frag, totalLen, err +} +// ReadBytes reads until the first occurrence of delim in the input, +// returning a slice containing the data up to and including the delimiter. +// If ReadBytes encounters an error before finding a delimiter, +// it returns the data read before the error and the error itself (often io.EOF). +// ReadBytes returns err != nil if and only if the returned data does not end in +// delim. +// For simple uses, a Scanner may be more convenient. +func (b *Reader) ReadBytes(delim byte) ([]byte, error) { + full, frag, n, err := b.collectFragments(delim) // Allocate new buffer to hold the full pieces and the fragment. buf := make([]byte, n) n = 0 @@ -472,8 +480,16 @@ func (b *Reader) ReadBytes(delim byte) ([]byte, error) { // delim. // For simple uses, a Scanner may be more convenient. func (b *Reader) ReadString(delim byte) (string, error) { - bytes, err := b.ReadBytes(delim) - return string(bytes), err + full, frag, n, err := b.collectFragments(delim) + // Allocate new buffer to hold the full pieces and the fragment. + var buf strings.Builder + buf.Grow(n) + // Copy full pieces and fragment in. + for _, fb := range full { + buf.Write(fb) + } + buf.Write(frag) + return buf.String(), err } // WriteTo implements io.WriterTo. diff --git a/src/bufio/bufio_test.go b/src/bufio/bufio_test.go index 4c4522c660..cb68f3ba23 100644 --- a/src/bufio/bufio_test.go +++ b/src/bufio/bufio_test.go @@ -535,6 +535,23 @@ func TestReadWriteRune(t *testing.T) { } } +func TestReadStringAllocs(t *testing.T) { + r := strings.NewReader(" foo foo 42 42 42 42 42 42 42 42 4.2 4.2 4.2 4.2\n") + buf := NewReader(r) + allocs := testing.AllocsPerRun(100, func() { + r.Seek(0, io.SeekStart) + buf.Reset(r) + + _, err := buf.ReadString('\n') + if err != nil { + t.Fatal(err) + } + }) + if allocs != 1 { + t.Errorf("Unexpected number of allocations, got %f, want 1", allocs) + } +} + func TestWriter(t *testing.T) { var data [8192]byte @@ -1644,6 +1661,21 @@ func BenchmarkReaderWriteToOptimal(b *testing.B) { } } +func BenchmarkReaderReadString(b *testing.B) { + r := strings.NewReader(" foo foo 42 42 42 42 42 42 42 42 4.2 4.2 4.2 4.2\n") + buf := NewReader(r) + b.ReportAllocs() + for i := 0; i < b.N; i++ { + r.Seek(0, io.SeekStart) + buf.Reset(r) + + _, err := buf.ReadString('\n') + if err != nil { + b.Fatal(err) + } + } +} + func BenchmarkWriterCopyOptimal(b *testing.B) { // Optimal case is where the underlying writer implements io.ReaderFrom srcBuf := bytes.NewBuffer(make([]byte, 8192)) |