aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2022-03-24 12:44:02 -0700
committerJordan <me@jordan.im>2022-03-24 12:44:02 -0700
commit5fe5bcb9360f77367164e7c96314961cac15ae9f (patch)
tree17e823d31fcf7e2b69608d95124c58c62b299bed
parent8b86f4fd05f522350a1f86ed531b6e8f568c1813 (diff)
downloadcrawl-5fe5bcb9360f77367164e7c96314961cac15ae9f.tar.gz
crawl-5fe5bcb9360f77367164e7c96314961cac15ae9f.zip
crawler: continue crawl when context deadline exceeded (timeout)
-rw-r--r--crawler.go9
1 files changed, 6 insertions, 3 deletions
diff --git a/crawler.go b/crawler.go
index b5f0822..195bab6 100644
--- a/crawler.go
+++ b/crawler.go
@@ -269,11 +269,14 @@ func (c *Crawler) urlHandler(queue <-chan queuePair) {
os.Remove(tmpFile.Name())
wb := new(leveldb.Batch)
- switch err {
- case nil:
+ switch {
+ case err == nil:
c.queue.Release(wb, p)
- case ErrRetryRequest:
+ case err == ErrRetryRequest:
Must(c.queue.Retry(wb, p, errorRetryDelay))
+ case os.IsTimeout(err):
+ log.Printf("network error in handling %s: %v", p.URL, err)
+ c.queue.Release(wb, p)
default:
log.Panicf("fatal error in handling %s: %v", p.URL, err)
}