diff options
author | Jordan <me@jordan.im> | 2022-02-10 20:19:27 -0700 |
---|---|---|
committer | Jordan <me@jordan.im> | 2022-02-10 20:19:27 -0700 |
commit | caadc00d8dfadc0c9e0237fc7377eb632f500926 (patch) | |
tree | 208a8ea6baac1caa3674ecdfcefa656c254212fd /cmd | |
parent | 9ff760bdc4b0d208b64ba33e3af13228f4aca58f (diff) | |
download | crawl-caadc00d8dfadc0c9e0237fc7377eb632f500926.tar.gz crawl-caadc00d8dfadc0c9e0237fc7377eb632f500926.zip |
crawl, readme: max default WARC size 100 MB -> 5 GB
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/crawl/crawl.go | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go index ea88412..8c20901 100644 --- a/cmd/crawl/crawl.go +++ b/cmd/crawl/crawl.go @@ -37,7 +37,7 @@ var ( validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols") excludeRelated = flag.Bool("exclude-related", false, "do not include related resources (css, images, etc) if their URL is not in scope") resumeDir = flag.String("resume", "", "path to directory of previous crawl to resume") - warcFileSizeMB = flag.Int("output-max-size", 100, "maximum output WARC file size (in MB) when using patterns") + warcFileSizeMB = flag.Int("output-max-size", 5000, "maximum output WARC file size (in MB)") cpuprofile = flag.String("cpuprofile", "", "create cpu profile") bindIP = flag.String("bind", "", "IP address from which to make outbound connections") |