aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmd/crawl/crawl.go8
-rw-r--r--crawler.go4
2 files changed, 11 insertions, 1 deletions
diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go
index d68ac5e..de45494 100644
--- a/cmd/crawl/crawl.go
+++ b/cmd/crawl/crawl.go
@@ -24,6 +24,7 @@ import (
var (
dbPath = flag.String("state", "crawldb", "crawl state database path")
+ keepDb = flag.Bool("keep", false, "keep the state database when done")
concurrency = flag.Int("c", 10, "concurrent workers")
depth = flag.Int("depth", 10, "maximum link depth")
validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols")
@@ -207,9 +208,14 @@ func main() {
saver := NewSaveHandler(w)
- crawler, err := crawl.NewCrawler("crawldb", seeds, scope, crawl.FetcherFunc(fetch), crawl.NewRedirectHandler(saver))
+ crawler, err := crawl.NewCrawler(*dbPath, seeds, scope, crawl.FetcherFunc(fetch), crawl.NewRedirectHandler(saver))
if err != nil {
log.Fatal(err)
}
crawler.Run(*concurrency)
+
+ crawler.Close()
+ if !*keepDb {
+ os.RemoveAll(*dbPath)
+ }
}
diff --git a/crawler.go b/crawler.go
index c337d97..d162330 100644
--- a/crawler.go
+++ b/crawler.go
@@ -319,6 +319,10 @@ func (c *Crawler) Run(concurrency int) {
wg.Wait()
}
+func (c *Crawler) Close() {
+ c.db.Close()
+}
+
type redirectHandler struct {
h Handler
}