diff options
author | ale <ale@incal.net> | 2018-08-31 10:36:49 +0100 |
---|---|---|
committer | ale <ale@incal.net> | 2018-08-31 10:36:49 +0100 |
commit | 9825334954ec555a9798e8e9be1ac04093595793 (patch) | |
tree | af898b7f30294c5edc784de591f083ab21a3ebef /cmd/links | |
parent | 70c12b7a5de3fe635f4f49aa7e249f5d6141d2af (diff) | |
download | crawl-9825334954ec555a9798e8e9be1ac04093595793.tar.gz crawl-9825334954ec555a9798e8e9be1ac04093595793.zip |
Explicitly delegate retry logic to handlers
Makes it possible to retry requests for temporary HTTP errors (429,
500, etc).
Diffstat (limited to 'cmd/links')
-rw-r--r-- | cmd/links/links.go | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/cmd/links/links.go b/cmd/links/links.go index 5f76a6a..bf91f3f 100644 --- a/cmd/links/links.go +++ b/cmd/links/links.go @@ -20,11 +20,7 @@ var ( validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols") ) -func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, err error) error { - if err != nil { - return nil - } - +func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, _ error) error { links, err := analysis.GetLinks(resp) if err != nil { // Not a fatal error, just a bad web page. @@ -50,7 +46,13 @@ func main() { crawl.NewSeedScope(seeds), ) - crawler, err := crawl.NewCrawler("crawldb", seeds, scope, crawl.FetcherFunc(http.Get), crawl.NewRedirectHandler(crawl.HandlerFunc(extractLinks))) + crawler, err := crawl.NewCrawler( + "crawldb", + seeds, + scope, + crawl.FetcherFunc(http.Get), + crawl.HandleRetries(crawl.FollowRedirects(crawl.FilterErrors(crawl.HandlerFunc(extractLinks)))), + ) if err != nil { log.Fatal(err) } |