aboutsummaryrefslogtreecommitdiff
path: root/src/bufio
diff options
context:
space:
mode:
authorAlex Gaynor <alex.gaynor@gmail.com>2020-04-27 23:10:23 +0000
committerIan Lance Taylor <iant@golang.org>2020-04-28 00:53:32 +0000
commitc2e0f01598fbc17d2f960fe93c5bcb057203b75d (patch)
tree544402aa7986b8c1fcf7e0957e663f9932a338fd /src/bufio
parent42c48998aada0df10279650d04a018c83cbfa518 (diff)
downloadgo-c2e0f01598fbc17d2f960fe93c5bcb057203b75d.tar.gz
go-c2e0f01598fbc17d2f960fe93c5bcb057203b75d.zip
bufio: optimize bufio.Reader.ReadString to avoid an allocation and copy
name old time/op new time/op delta ReaderReadString-4 226ns ±12% 161ns ±11% -28.76% (p=0.008 n=5+5) name old alloc/op new alloc/op delta ReaderReadString-4 288B ± 0% 144B ± 0% -50.00% (p=0.008 n=5+5) name old allocs/op new allocs/op delta ReaderReadString-4 2.00 ± 0% 1.00 ± 0% -50.00% (p=0.008 n=5+5) Change-Id: I77f330b8340c2bfbfff1f6f1000170b65953a200 GitHub-Last-Rev: 65d65302a7b80504b4d37b81a3843fe1439e638a GitHub-Pull-Request: golang/go#34706 Reviewed-on: https://go-review.googlesource.com/c/go/+/199257 Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/bufio')
-rw-r--r--src/bufio/bufio.go52
-rw-r--r--src/bufio/bufio_test.go32
2 files changed, 66 insertions, 18 deletions
diff --git a/src/bufio/bufio.go b/src/bufio/bufio.go
index f0810be3a4..7cbd5424ea 100644
--- a/src/bufio/bufio.go
+++ b/src/bufio/bufio.go
@@ -11,6 +11,7 @@ import (
"bytes"
"errors"
"io"
+ "strings"
"unicode/utf8"
)
@@ -419,20 +420,16 @@ func (b *Reader) ReadLine() (line []byte, isPrefix bool, err error) {
return
}
-// ReadBytes reads until the first occurrence of delim in the input,
-// returning a slice containing the data up to and including the delimiter.
-// If ReadBytes encounters an error before finding a delimiter,
-// it returns the data read before the error and the error itself (often io.EOF).
-// ReadBytes returns err != nil if and only if the returned data does not end in
-// delim.
-// For simple uses, a Scanner may be more convenient.
-func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
- // Use ReadSlice to look for array,
- // accumulating full buffers.
+// collectFragments reads until the first occurrence of delim in the input. It
+// returns (slice of full buffers, remaining bytes before delim, total number
+// of bytes in the combined first two elements, error).
+// The complete result is equal to
+// `bytes.Join(append(fullBuffers, finalFragment), nil)`, which has a
+// length of `totalLen`. The result is strucured in this way to allow callers
+// to minimize allocations and copies.
+func (b *Reader) collectFragments(delim byte) (fullBuffers [][]byte, finalFragment []byte, totalLen int, err error) {
var frag []byte
- var full [][]byte
- var err error
- n := 0
+ // Use ReadSlice to look for delim, accumulating full buffers.
for {
var e error
frag, e = b.ReadSlice(delim)
@@ -447,12 +444,23 @@ func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
// Make a copy of the buffer.
buf := make([]byte, len(frag))
copy(buf, frag)
- full = append(full, buf)
- n += len(buf)
+ fullBuffers = append(fullBuffers, buf)
+ totalLen += len(buf)
}
- n += len(frag)
+ totalLen += len(frag)
+ return fullBuffers, frag, totalLen, err
+}
+// ReadBytes reads until the first occurrence of delim in the input,
+// returning a slice containing the data up to and including the delimiter.
+// If ReadBytes encounters an error before finding a delimiter,
+// it returns the data read before the error and the error itself (often io.EOF).
+// ReadBytes returns err != nil if and only if the returned data does not end in
+// delim.
+// For simple uses, a Scanner may be more convenient.
+func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
+ full, frag, n, err := b.collectFragments(delim)
// Allocate new buffer to hold the full pieces and the fragment.
buf := make([]byte, n)
n = 0
@@ -472,8 +480,16 @@ func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
// delim.
// For simple uses, a Scanner may be more convenient.
func (b *Reader) ReadString(delim byte) (string, error) {
- bytes, err := b.ReadBytes(delim)
- return string(bytes), err
+ full, frag, n, err := b.collectFragments(delim)
+ // Allocate new buffer to hold the full pieces and the fragment.
+ var buf strings.Builder
+ buf.Grow(n)
+ // Copy full pieces and fragment in.
+ for _, fb := range full {
+ buf.Write(fb)
+ }
+ buf.Write(frag)
+ return buf.String(), err
}
// WriteTo implements io.WriterTo.
diff --git a/src/bufio/bufio_test.go b/src/bufio/bufio_test.go
index 4c4522c660..cb68f3ba23 100644
--- a/src/bufio/bufio_test.go
+++ b/src/bufio/bufio_test.go
@@ -535,6 +535,23 @@ func TestReadWriteRune(t *testing.T) {
}
}
+func TestReadStringAllocs(t *testing.T) {
+ r := strings.NewReader(" foo foo 42 42 42 42 42 42 42 42 4.2 4.2 4.2 4.2\n")
+ buf := NewReader(r)
+ allocs := testing.AllocsPerRun(100, func() {
+ r.Seek(0, io.SeekStart)
+ buf.Reset(r)
+
+ _, err := buf.ReadString('\n')
+ if err != nil {
+ t.Fatal(err)
+ }
+ })
+ if allocs != 1 {
+ t.Errorf("Unexpected number of allocations, got %f, want 1", allocs)
+ }
+}
+
func TestWriter(t *testing.T) {
var data [8192]byte
@@ -1644,6 +1661,21 @@ func BenchmarkReaderWriteToOptimal(b *testing.B) {
}
}
+func BenchmarkReaderReadString(b *testing.B) {
+ r := strings.NewReader(" foo foo 42 42 42 42 42 42 42 42 4.2 4.2 4.2 4.2\n")
+ buf := NewReader(r)
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ r.Seek(0, io.SeekStart)
+ buf.Reset(r)
+
+ _, err := buf.ReadString('\n')
+ if err != nil {
+ b.Fatal(err)
+ }
+ }
+}
+
func BenchmarkWriterCopyOptimal(b *testing.B) {
// Optimal case is where the underlying writer implements io.ReaderFrom
srcBuf := bytes.NewBuffer(make([]byte, 8192))