aboutsummaryrefslogtreecommitdiff
path: root/cmd/crawl/crawl.go
diff options
context:
space:
mode:
authorale <ale@incal.net>2018-08-31 11:08:50 +0100
committerale <ale@incal.net>2018-08-31 11:08:50 +0100
commit23a80bd68c5c51967eaf4e6a857c5d59fe58daf5 (patch)
tree55fc0a8222b5b282a693124b6f81f00ed91ab6d4 /cmd/crawl/crawl.go
parent9825334954ec555a9798e8e9be1ac04093595793 (diff)
downloadcrawl-23a80bd68c5c51967eaf4e6a857c5d59fe58daf5.tar.gz
crawl-23a80bd68c5c51967eaf4e6a857c5d59fe58daf5.zip
Add a simple test for the full WARC crawler
Diffstat (limited to 'cmd/crawl/crawl.go')
-rw-r--r--cmd/crawl/crawl.go3
1 files changed, 3 insertions, 0 deletions
diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go
index cf2af5d..bbbd65b 100644
--- a/cmd/crawl/crawl.go
+++ b/cmd/crawl/crawl.go
@@ -62,6 +62,7 @@ func hdr2str(h http.Header) []byte {
type warcSaveHandler struct {
warc *warc.Writer
warcInfoID string
+ numWritten int
}
func (h *warcSaveHandler) writeWARCRecord(typ, uri string, data []byte) error {
@@ -109,6 +110,8 @@ func (h *warcSaveHandler) Handle(c *crawl.Crawler, u string, depth int, resp *ht
return werr
}
+ h.numWritten++
+
return extractLinks(c, u, depth, resp, nil)
}