diff options
author | ale <ale@incal.net> | 2018-08-31 08:29:14 +0100 |
---|---|---|
committer | ale <ale@incal.net> | 2018-08-31 08:29:14 +0100 |
commit | ee1a3d8e5278a4a4e8435f9129852b95a9c22afb (patch) | |
tree | fd7a42cfff4aed5bd2379feb35f7172287430ba2 /warc | |
parent | b3d419486a87c9193c2fd6c16168f600876e0f73 (diff) | |
download | crawl-ee1a3d8e5278a4a4e8435f9129852b95a9c22afb.tar.gz crawl-ee1a3d8e5278a4a4e8435f9129852b95a9c22afb.zip |
Improve error checking
Detect write errors (both on the database and to the WARC output) and
abort with an error message.
Also fix a bunch of harmless lint warnings.
Diffstat (limited to 'warc')
-rw-r--r-- | warc/warc.go | 38 |
1 files changed, 24 insertions, 14 deletions
diff --git a/warc/warc.go b/warc/warc.go index 49ab7a0..6914c1b 100644 --- a/warc/warc.go +++ b/warc/warc.go @@ -47,12 +47,17 @@ func (h Header) Get(key string) string { } // Encode the header to a Writer. -func (h Header) Encode(w io.Writer) { - fmt.Fprintf(w, "%s\r\n", warcVersion) +func (h Header) Encode(w io.Writer) error { + if _, err := fmt.Fprintf(w, "%s\r\n", warcVersion); err != nil { + return err + } for hdr, value := range h { - fmt.Fprintf(w, "%s: %s\r\n", hdr, value) + if _, err := fmt.Fprintf(w, "%s: %s\r\n", hdr, value); err != nil { + return err + } } - fmt.Fprintf(w, "\r\n") + _, err := fmt.Fprintf(w, "\r\n") + return err } // NewHeader returns a Header with its own unique ID and the @@ -80,26 +85,31 @@ type recordWriter struct { func (rw *recordWriter) Close() error { // Add the end-of-record marker. - fmt.Fprintf(rw, "\r\n\r\n") - + _, err := fmt.Fprintf(rw, "\r\n\r\n") <-rw.lockCh - - return nil + return err } // NewRecord starts a new WARC record with the provided header. The // caller must call Close on the returned writer before creating the // next record. Note that this function may block until that condition -// is satisfied. -func (w *Writer) NewRecord(hdr Header) io.WriteCloser { +// is satisfied. If this function returns an error, the state of the +// Writer is invalid and it should no longer be used. +func (w *Writer) NewRecord(hdr Header) (io.WriteCloser, error) { w.lockCh <- true if w.gzwriter != nil { - w.gzwriter.Close() + w.gzwriter.Close() // nolint + } + var err error + w.gzwriter, err = gzip.NewWriterLevel(w.writer, gzip.BestCompression) + if err != nil { + return nil, err } - w.gzwriter, _ = gzip.NewWriterLevel(w.writer, gzip.BestCompression) w.gzwriter.Header.Name = hdr.Get("WARC-Record-ID") - hdr.Encode(w.gzwriter) - return &recordWriter{Writer: w.gzwriter, lockCh: w.lockCh} + if err = hdr.Encode(w.gzwriter); err != nil { + return nil, err + } + return &recordWriter{Writer: w.gzwriter, lockCh: w.lockCh}, nil } // Close the WARC writer and flush all buffers. This will also call |