aboutsummaryrefslogtreecommitdiff
path: root/warc
diff options
context:
space:
mode:
authorale <ale@incal.net>2018-08-31 08:29:14 +0100
committerale <ale@incal.net>2018-08-31 08:29:14 +0100
commitee1a3d8e5278a4a4e8435f9129852b95a9c22afb (patch)
treefd7a42cfff4aed5bd2379feb35f7172287430ba2 /warc
parentb3d419486a87c9193c2fd6c16168f600876e0f73 (diff)
downloadcrawl-ee1a3d8e5278a4a4e8435f9129852b95a9c22afb.tar.gz
crawl-ee1a3d8e5278a4a4e8435f9129852b95a9c22afb.zip
Improve error checking
Detect write errors (both on the database and to the WARC output) and abort with an error message. Also fix a bunch of harmless lint warnings.
Diffstat (limited to 'warc')
-rw-r--r--warc/warc.go38
1 files changed, 24 insertions, 14 deletions
diff --git a/warc/warc.go b/warc/warc.go
index 49ab7a0..6914c1b 100644
--- a/warc/warc.go
+++ b/warc/warc.go
@@ -47,12 +47,17 @@ func (h Header) Get(key string) string {
}
// Encode the header to a Writer.
-func (h Header) Encode(w io.Writer) {
- fmt.Fprintf(w, "%s\r\n", warcVersion)
+func (h Header) Encode(w io.Writer) error {
+ if _, err := fmt.Fprintf(w, "%s\r\n", warcVersion); err != nil {
+ return err
+ }
for hdr, value := range h {
- fmt.Fprintf(w, "%s: %s\r\n", hdr, value)
+ if _, err := fmt.Fprintf(w, "%s: %s\r\n", hdr, value); err != nil {
+ return err
+ }
}
- fmt.Fprintf(w, "\r\n")
+ _, err := fmt.Fprintf(w, "\r\n")
+ return err
}
// NewHeader returns a Header with its own unique ID and the
@@ -80,26 +85,31 @@ type recordWriter struct {
func (rw *recordWriter) Close() error {
// Add the end-of-record marker.
- fmt.Fprintf(rw, "\r\n\r\n")
-
+ _, err := fmt.Fprintf(rw, "\r\n\r\n")
<-rw.lockCh
-
- return nil
+ return err
}
// NewRecord starts a new WARC record with the provided header. The
// caller must call Close on the returned writer before creating the
// next record. Note that this function may block until that condition
-// is satisfied.
-func (w *Writer) NewRecord(hdr Header) io.WriteCloser {
+// is satisfied. If this function returns an error, the state of the
+// Writer is invalid and it should no longer be used.
+func (w *Writer) NewRecord(hdr Header) (io.WriteCloser, error) {
w.lockCh <- true
if w.gzwriter != nil {
- w.gzwriter.Close()
+ w.gzwriter.Close() // nolint
+ }
+ var err error
+ w.gzwriter, err = gzip.NewWriterLevel(w.writer, gzip.BestCompression)
+ if err != nil {
+ return nil, err
}
- w.gzwriter, _ = gzip.NewWriterLevel(w.writer, gzip.BestCompression)
w.gzwriter.Header.Name = hdr.Get("WARC-Record-ID")
- hdr.Encode(w.gzwriter)
- return &recordWriter{Writer: w.gzwriter, lockCh: w.lockCh}
+ if err = hdr.Encode(w.gzwriter); err != nil {
+ return nil, err
+ }
+ return &recordWriter{Writer: w.gzwriter, lockCh: w.lockCh}, nil
}
// Close the WARC writer and flush all buffers. This will also call