From 5fe5bcb9360f77367164e7c96314961cac15ae9f Mon Sep 17 00:00:00 2001 From: Jordan Date: Thu, 24 Mar 2022 12:44:02 -0700 Subject: crawler: continue crawl when context deadline exceeded (timeout) --- crawler.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crawler.go b/crawler.go index b5f0822..195bab6 100644 --- a/crawler.go +++ b/crawler.go @@ -269,11 +269,14 @@ func (c *Crawler) urlHandler(queue <-chan queuePair) { os.Remove(tmpFile.Name()) wb := new(leveldb.Batch) - switch err { - case nil: + switch { + case err == nil: c.queue.Release(wb, p) - case ErrRetryRequest: + case err == ErrRetryRequest: Must(c.queue.Retry(wb, p, errorRetryDelay)) + case os.IsTimeout(err): + log.Printf("network error in handling %s: %v", p.URL, err) + c.queue.Release(wb, p) default: log.Panicf("fatal error in handling %s: %v", p.URL, err) } -- cgit v1.2.3-54-g00ecf