aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2022-02-10 11:40:46 -0700
committerJordan <me@jordan.im>2022-02-10 11:40:46 -0700
commit2191536b9ea9084525f649dff38b88ac40d222f2 (patch)
tree276eedc26746021ab942bce08ee803ce21a4b071
parentef2c410063d3be2632ad7449cab6f51511face6b (diff)
downloadcrawl-2191536b9ea9084525f649dff38b88ac40d222f2.tar.gz
crawl-2191536b9ea9084525f649dff38b88ac40d222f2.zip
crawl, scope: recurse infinitely by default
-rw-r--r--cmd/crawl/crawl.go2
-rw-r--r--scope.go2
2 files changed, 2 insertions, 2 deletions
diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go
index 4a98b02..90f19d6 100644
--- a/cmd/crawl/crawl.go
+++ b/cmd/crawl/crawl.go
@@ -32,7 +32,7 @@ var (
dbPath = flag.String("state", "crawldb", "crawl state database path")
keepDb = flag.Bool("keep", false, "keep the state database when done")
concurrency = flag.Int("c", 10, "concurrent workers")
- depth = flag.Int("depth", 100, "maximum link depth")
+ depth = flag.Int("depth", -1, "maximum link depth")
validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols")
excludeRelated = flag.Bool("exclude-related", false, "do not include related resources (css, images, etc) if their URL is not in scope")
outputFile = flag.String("output", "crawl.warc.gz", "output WARC file or pattern (patterns must include a \"%s\" literal token)")
diff --git a/scope.go b/scope.go
index bda1035..c53182f 100644
--- a/scope.go
+++ b/scope.go
@@ -18,7 +18,7 @@ type maxDepthScope struct {
}
func (s *maxDepthScope) Check(_ Outlink, depth int) bool {
- return depth < s.maxDepth
+ return depth < s.maxDepth || s.maxDepth == -1
}
// NewDepthScope returns a Scope that will limit crawls to a