aboutsummaryrefslogtreecommitdiff
path: root/cmd/crawl/crawl.go
diff options
context:
space:
mode:
Diffstat (limited to 'cmd/crawl/crawl.go')
-rw-r--r--cmd/crawl/crawl.go8
1 files changed, 7 insertions, 1 deletions
diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go
index d68ac5e..de45494 100644
--- a/cmd/crawl/crawl.go
+++ b/cmd/crawl/crawl.go
@@ -24,6 +24,7 @@ import (
var (
dbPath = flag.String("state", "crawldb", "crawl state database path")
+ keepDb = flag.Bool("keep", false, "keep the state database when done")
concurrency = flag.Int("c", 10, "concurrent workers")
depth = flag.Int("depth", 10, "maximum link depth")
validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols")
@@ -207,9 +208,14 @@ func main() {
saver := NewSaveHandler(w)
- crawler, err := crawl.NewCrawler("crawldb", seeds, scope, crawl.FetcherFunc(fetch), crawl.NewRedirectHandler(saver))
+ crawler, err := crawl.NewCrawler(*dbPath, seeds, scope, crawl.FetcherFunc(fetch), crawl.NewRedirectHandler(saver))
if err != nil {
log.Fatal(err)
}
crawler.Run(*concurrency)
+
+ crawler.Close()
+ if !*keepDb {
+ os.RemoveAll(*dbPath)
+ }
}