diff options
author | ale <ale@incal.net> | 2018-08-31 11:08:50 +0100 |
---|---|---|
committer | ale <ale@incal.net> | 2018-08-31 11:08:50 +0100 |
commit | 23a80bd68c5c51967eaf4e6a857c5d59fe58daf5 (patch) | |
tree | 55fc0a8222b5b282a693124b6f81f00ed91ab6d4 /cmd/crawl/crawl.go | |
parent | 9825334954ec555a9798e8e9be1ac04093595793 (diff) | |
download | crawl-23a80bd68c5c51967eaf4e6a857c5d59fe58daf5.tar.gz crawl-23a80bd68c5c51967eaf4e6a857c5d59fe58daf5.zip |
Add a simple test for the full WARC crawler
Diffstat (limited to 'cmd/crawl/crawl.go')
-rw-r--r-- | cmd/crawl/crawl.go | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go index cf2af5d..bbbd65b 100644 --- a/cmd/crawl/crawl.go +++ b/cmd/crawl/crawl.go @@ -62,6 +62,7 @@ func hdr2str(h http.Header) []byte { type warcSaveHandler struct { warc *warc.Writer warcInfoID string + numWritten int } func (h *warcSaveHandler) writeWARCRecord(typ, uri string, data []byte) error { @@ -109,6 +110,8 @@ func (h *warcSaveHandler) Handle(c *crawl.Crawler, u string, depth int, resp *ht return werr } + h.numWritten++ + return extractLinks(c, u, depth, resp, nil) } |