aboutsummaryrefslogtreecommitdiff
path: root/cmd/links
diff options
context:
space:
mode:
authorale <ale@incal.net>2018-08-31 08:29:14 +0100
committerale <ale@incal.net>2018-08-31 08:29:14 +0100
commitee1a3d8e5278a4a4e8435f9129852b95a9c22afb (patch)
treefd7a42cfff4aed5bd2379feb35f7172287430ba2 /cmd/links
parentb3d419486a87c9193c2fd6c16168f600876e0f73 (diff)
downloadcrawl-ee1a3d8e5278a4a4e8435f9129852b95a9c22afb.tar.gz
crawl-ee1a3d8e5278a4a4e8435f9129852b95a9c22afb.zip
Improve error checking
Detect write errors (both on the database and to the WARC output) and abort with an error message. Also fix a bunch of harmless lint warnings.
Diffstat (limited to 'cmd/links')
-rw-r--r--cmd/links/links.go10
1 files changed, 8 insertions, 2 deletions
diff --git a/cmd/links/links.go b/cmd/links/links.go
index e8b0fcb..9cd741f 100644
--- a/cmd/links/links.go
+++ b/cmd/links/links.go
@@ -15,20 +15,25 @@ import (
)
var (
- dbPath = flag.String("state", "crawldb", "crawl state database path")
concurrency = flag.Int("c", 10, "concurrent workers")
depth = flag.Int("depth", 10, "maximum link depth")
validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols")
)
func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, err error) error {
+ if err != nil {
+ return err
+ }
+
links, err := analysis.GetLinks(resp)
if err != nil {
return err
}
for _, link := range links {
- c.Enqueue(link, depth+1)
+ if err := c.Enqueue(link, depth+1); err != nil {
+ return err
+ }
}
return nil
@@ -49,4 +54,5 @@ func main() {
log.Fatal(err)
}
crawler.Run(*concurrency)
+ crawler.Close()
}