aboutsummaryrefslogtreecommitdiff
path: root/warc
diff options
context:
space:
mode:
authorale <ale@incal.net>2018-08-31 09:44:04 +0100
committerale <ale@incal.net>2018-08-31 09:44:04 +0100
commit98e2528f410908e50b4be3a2d5f6ed2b5f32bd2c (patch)
tree5f3bd8cf744b2aac9dc5098982048d2512f8cc6f /warc
parentee1a3d8e5278a4a4e8435f9129852b95a9c22afb (diff)
downloadcrawl-98e2528f410908e50b4be3a2d5f6ed2b5f32bd2c.tar.gz
crawl-98e2528f410908e50b4be3a2d5f6ed2b5f32bd2c.zip
Use a buffered Writer for WARC output
Diffstat (limited to 'warc')
-rw-r--r--warc/warc.go16
1 files changed, 11 insertions, 5 deletions
diff --git a/warc/warc.go b/warc/warc.go
index 6914c1b..1184d9c 100644
--- a/warc/warc.go
+++ b/warc/warc.go
@@ -3,6 +3,7 @@
package warc
import (
+ "bufio"
"fmt"
"io"
"time"
@@ -73,9 +74,10 @@ func NewHeader() Header {
// Writer can write records to a file in WARC format. It is safe
// for concurrent access, since writes are serialized internally.
type Writer struct {
- writer io.WriteCloser
- gzwriter *gzip.Writer
- lockCh chan bool
+ writer io.WriteCloser
+ bufwriter *bufio.Writer
+ gzwriter *gzip.Writer
+ lockCh chan bool
}
type recordWriter struct {
@@ -101,7 +103,7 @@ func (w *Writer) NewRecord(hdr Header) (io.WriteCloser, error) {
w.gzwriter.Close() // nolint
}
var err error
- w.gzwriter, err = gzip.NewWriterLevel(w.writer, gzip.BestCompression)
+ w.gzwriter, err = gzip.NewWriterLevel(w.bufwriter, gzip.BestCompression)
if err != nil {
return nil, err
}
@@ -118,13 +120,17 @@ func (w *Writer) Close() error {
if err := w.gzwriter.Close(); err != nil {
return err
}
+ if err := w.bufwriter.Flush(); err != nil {
+ return err
+ }
return w.writer.Close()
}
// NewWriter initializes a new Writer and returns it.
func NewWriter(w io.WriteCloser) *Writer {
return &Writer{
- writer: w,
+ writer: w,
+ bufwriter: bufio.NewWriter(w),
// Buffering is important here since we're using this
// channel as a semaphore.
lockCh: make(chan bool, 1),