From 98e2528f410908e50b4be3a2d5f6ed2b5f32bd2c Mon Sep 17 00:00:00 2001 From: ale Date: Fri, 31 Aug 2018 09:44:04 +0100 Subject: Use a buffered Writer for WARC output --- warc/warc.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/warc/warc.go b/warc/warc.go index 6914c1b..1184d9c 100644 --- a/warc/warc.go +++ b/warc/warc.go @@ -3,6 +3,7 @@ package warc import ( + "bufio" "fmt" "io" "time" @@ -73,9 +74,10 @@ func NewHeader() Header { // Writer can write records to a file in WARC format. It is safe // for concurrent access, since writes are serialized internally. type Writer struct { - writer io.WriteCloser - gzwriter *gzip.Writer - lockCh chan bool + writer io.WriteCloser + bufwriter *bufio.Writer + gzwriter *gzip.Writer + lockCh chan bool } type recordWriter struct { @@ -101,7 +103,7 @@ func (w *Writer) NewRecord(hdr Header) (io.WriteCloser, error) { w.gzwriter.Close() // nolint } var err error - w.gzwriter, err = gzip.NewWriterLevel(w.writer, gzip.BestCompression) + w.gzwriter, err = gzip.NewWriterLevel(w.bufwriter, gzip.BestCompression) if err != nil { return nil, err } @@ -118,13 +120,17 @@ func (w *Writer) Close() error { if err := w.gzwriter.Close(); err != nil { return err } + if err := w.bufwriter.Flush(); err != nil { + return err + } return w.writer.Close() } // NewWriter initializes a new Writer and returns it. func NewWriter(w io.WriteCloser) *Writer { return &Writer{ - writer: w, + writer: w, + bufwriter: bufio.NewWriter(w), // Buffering is important here since we're using this // channel as a semaphore. lockCh: make(chan bool, 1), -- cgit v1.2.3-54-g00ecf