aboutsummaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
authorale <ale@incal.net>2014-12-20 11:41:24 +0000
committerale <ale@incal.net>2014-12-20 11:41:24 +0000
commit4c82422d2e75b9b8f4d034b1f43fda566416d6af (patch)
tree91a66ec0d3ebd7739658794bca3e6a2ff4f6b400 /cmd
parentefe98903c17a9103d7830361d6ff6f98bb9e0faa (diff)
downloadcrawl-4c82422d2e75b9b8f4d034b1f43fda566416d6af.tar.gz
crawl-4c82422d2e75b9b8f4d034b1f43fda566416d6af.zip
make Scope checking more modular
Diffstat (limited to 'cmd')
-rw-r--r--cmd/crawl/crawl.go6
-rw-r--r--cmd/links/links.go6
2 files changed, 10 insertions, 2 deletions
diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go
index 1abeca6..8c02089 100644
--- a/cmd/crawl/crawl.go
+++ b/cmd/crawl/crawl.go
@@ -124,7 +124,11 @@ func main() {
}
seeds := crawl.MustParseURLs(flag.Args())
- scope := crawl.NewSeedScope(seeds, *depth, strings.Split(*validSchemes, ","))
+ scope := []crawl.Scope{
+ crawl.NewSchemeScope(strings.Split(*validSchemes, ",")),
+ crawl.NewDepthScope(*depth),
+ crawl.NewSeedScope(seeds),
+ }
w := warc.NewWriter(outf)
defer w.Close()
diff --git a/cmd/links/links.go b/cmd/links/links.go
index 95388ce..e89e22d 100644
--- a/cmd/links/links.go
+++ b/cmd/links/links.go
@@ -38,7 +38,11 @@ func main() {
flag.Parse()
seeds := crawl.MustParseURLs(flag.Args())
- scope := crawl.NewSeedScope(seeds, *depth, strings.Split(*validSchemes, ","))
+ scope := []crawl.Scope{
+ crawl.NewSchemeScope(strings.Split(*validSchemes, ",")),
+ crawl.NewDepthScope(*depth),
+ crawl.NewSeedScope(seeds),
+ }
crawler, err := crawl.NewCrawler("crawldb", seeds, scope, crawl.FetcherFunc(http.Get), crawl.HandlerFunc(extractLinks))
if err != nil {