diff options
author | ale <ale@incal.net> | 2018-08-31 09:57:06 +0100 |
---|---|---|
committer | ale <ale@incal.net> | 2018-08-31 09:57:06 +0100 |
commit | 70c12b7a5de3fe635f4f49aa7e249f5d6141d2af (patch) | |
tree | bb532cda2f759f77e61508600cfc1f23e37bb7ba /cmd | |
parent | 98e2528f410908e50b4be3a2d5f6ed2b5f32bd2c (diff) | |
download | crawl-70c12b7a5de3fe635f4f49aa7e249f5d6141d2af.tar.gz crawl-70c12b7a5de3fe635f4f49aa7e249f5d6141d2af.zip |
Improve error handling, part two
Handler errors are fatal, so that an error writing the WARC output
will cause the crawl to abort.
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/crawl/crawl.go | 12 | ||||
-rw-r--r-- | cmd/links/links.go | 5 |
2 files changed, 10 insertions, 7 deletions
diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go index 3d1120c..587b64a 100644 --- a/cmd/crawl/crawl.go +++ b/cmd/crawl/crawl.go @@ -40,7 +40,8 @@ var ( func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, _ error) error { links, err := analysis.GetLinks(resp) if err != nil { - return err + // This is not a fatal error, just a bad web page. + return nil } for _, link := range links { @@ -82,7 +83,7 @@ func (h *warcSaveHandler) writeWARCRecord(typ, uri string, data []byte) error { func (h *warcSaveHandler) Handle(c *crawl.Crawler, u string, depth int, resp *http.Response, err error) error { if err != nil { - return err + return nil } // Read the response body (so we can save it to the WARC @@ -104,9 +105,10 @@ func (h *warcSaveHandler) Handle(c *crawl.Crawler, u string, depth int, resp *ht // Dump the response. statusLine := fmt.Sprintf("HTTP/1.1 %s", resp.Status) - respPayload := bytes.Join([][]byte{ - []byte(statusLine), hdr2str(resp.Header), data}, - []byte{'\r', '\n'}) + respPayload := bytes.Join( + [][]byte{[]byte(statusLine), hdr2str(resp.Header), data}, + []byte{'\r', '\n'}, + ) if werr := h.writeWARCRecord("response", resp.Request.URL.String(), respPayload); werr != nil { return werr } diff --git a/cmd/links/links.go b/cmd/links/links.go index 9cd741f..5f76a6a 100644 --- a/cmd/links/links.go +++ b/cmd/links/links.go @@ -22,12 +22,13 @@ var ( func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, err error) error { if err != nil { - return err + return nil } links, err := analysis.GetLinks(resp) if err != nil { - return err + // Not a fatal error, just a bad web page. + return nil } for _, link := range links { |