diff options
author | ale <ale@incal.net> | 2018-08-31 09:44:04 +0100 |
---|---|---|
committer | ale <ale@incal.net> | 2018-08-31 09:44:04 +0100 |
commit | 98e2528f410908e50b4be3a2d5f6ed2b5f32bd2c (patch) | |
tree | 5f3bd8cf744b2aac9dc5098982048d2512f8cc6f /warc | |
parent | ee1a3d8e5278a4a4e8435f9129852b95a9c22afb (diff) | |
download | crawl-98e2528f410908e50b4be3a2d5f6ed2b5f32bd2c.tar.gz crawl-98e2528f410908e50b4be3a2d5f6ed2b5f32bd2c.zip |
Use a buffered Writer for WARC output
Diffstat (limited to 'warc')
-rw-r--r-- | warc/warc.go | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/warc/warc.go b/warc/warc.go index 6914c1b..1184d9c 100644 --- a/warc/warc.go +++ b/warc/warc.go @@ -3,6 +3,7 @@ package warc import ( + "bufio" "fmt" "io" "time" @@ -73,9 +74,10 @@ func NewHeader() Header { // Writer can write records to a file in WARC format. It is safe // for concurrent access, since writes are serialized internally. type Writer struct { - writer io.WriteCloser - gzwriter *gzip.Writer - lockCh chan bool + writer io.WriteCloser + bufwriter *bufio.Writer + gzwriter *gzip.Writer + lockCh chan bool } type recordWriter struct { @@ -101,7 +103,7 @@ func (w *Writer) NewRecord(hdr Header) (io.WriteCloser, error) { w.gzwriter.Close() // nolint } var err error - w.gzwriter, err = gzip.NewWriterLevel(w.writer, gzip.BestCompression) + w.gzwriter, err = gzip.NewWriterLevel(w.bufwriter, gzip.BestCompression) if err != nil { return nil, err } @@ -118,13 +120,17 @@ func (w *Writer) Close() error { if err := w.gzwriter.Close(); err != nil { return err } + if err := w.bufwriter.Flush(); err != nil { + return err + } return w.writer.Close() } // NewWriter initializes a new Writer and returns it. func NewWriter(w io.WriteCloser) *Writer { return &Writer{ - writer: w, + writer: w, + bufwriter: bufio.NewWriter(w), // Buffering is important here since we're using this // channel as a semaphore. lockCh: make(chan bool, 1), |