From 64eb5fb23f64f209e3d813e017097044a111151f Mon Sep 17 00:00:00 2001 From: ale Date: Sun, 20 Jan 2019 08:15:22 +0000 Subject: Refactor Handlers in terms of a Publisher interface Introduce an interface to decouple the Enqueue functionality from the Crawler implementation. --- cmd/crawl/crawl.go | 8 ++++---- cmd/links/links.go | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'cmd') diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go index 2ebba98..54bb505 100644 --- a/cmd/crawl/crawl.go +++ b/cmd/crawl/crawl.go @@ -82,7 +82,7 @@ func (f *excludesFileFlag) Set(s string) error { return nil } -func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, _ error) error { +func extractLinks(p crawl.Publisher, u string, depth int, resp *http.Response, _ error) error { links, err := analysis.GetLinks(resp) if err != nil { // This is not a fatal error, just a bad web page. @@ -90,7 +90,7 @@ func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, _ } for _, link := range links { - if err := c.Enqueue(link, depth+1); err != nil { + if err := p.Enqueue(link, depth+1); err != nil { return err } } @@ -127,7 +127,7 @@ func (h *warcSaveHandler) writeWARCRecord(typ, uri string, data []byte) error { return w.Close() } -func (h *warcSaveHandler) Handle(c *crawl.Crawler, u string, depth int, resp *http.Response, _ error) error { +func (h *warcSaveHandler) Handle(p crawl.Publisher, u string, depth int, resp *http.Response, _ error) error { // Read the response body (so we can save it to the WARC // output) and replace it with a buffer. data, derr := ioutil.ReadAll(resp.Body) @@ -157,7 +157,7 @@ func (h *warcSaveHandler) Handle(c *crawl.Crawler, u string, depth int, resp *ht h.numWritten++ - return extractLinks(c, u, depth, resp, nil) + return extractLinks(p, u, depth, resp, nil) } func newWarcSaveHandler(w *warc.Writer) (crawl.Handler, error) { diff --git a/cmd/links/links.go b/cmd/links/links.go index bf91f3f..2263414 100644 --- a/cmd/links/links.go +++ b/cmd/links/links.go @@ -20,7 +20,7 @@ var ( validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols") ) -func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, _ error) error { +func extractLinks(p crawl.Publisher, u string, depth int, resp *http.Response, _ error) error { links, err := analysis.GetLinks(resp) if err != nil { // Not a fatal error, just a bad web page. @@ -28,7 +28,7 @@ func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, _ } for _, link := range links { - if err := c.Enqueue(link, depth+1); err != nil { + if err := p.Enqueue(link, depth+1); err != nil { return err } } -- cgit v1.2.3-54-g00ecf