aboutsummaryrefslogtreecommitdiff
path: root/cmd
diff options
context:
space:
mode:
authorale <ale@incal.net>2019-01-20 08:15:22 +0000
committerale <ale@incal.net>2019-01-20 08:15:22 +0000
commit64eb5fb23f64f209e3d813e017097044a111151f (patch)
tree03288c4cfee6f8f59d4e2dabbe057292273fe653 /cmd
parentcce28f44e7ad88900e6c53394a8e496f2955b784 (diff)
downloadcrawl-64eb5fb23f64f209e3d813e017097044a111151f.tar.gz
crawl-64eb5fb23f64f209e3d813e017097044a111151f.zip
Refactor Handlers in terms of a Publisher interface
Introduce an interface to decouple the Enqueue functionality from the Crawler implementation.
Diffstat (limited to 'cmd')
-rw-r--r--cmd/crawl/crawl.go8
-rw-r--r--cmd/links/links.go4
2 files changed, 6 insertions, 6 deletions
diff --git a/cmd/crawl/crawl.go b/cmd/crawl/crawl.go
index 2ebba98..54bb505 100644
--- a/cmd/crawl/crawl.go
+++ b/cmd/crawl/crawl.go
@@ -82,7 +82,7 @@ func (f *excludesFileFlag) Set(s string) error {
return nil
}
-func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, _ error) error {
+func extractLinks(p crawl.Publisher, u string, depth int, resp *http.Response, _ error) error {
links, err := analysis.GetLinks(resp)
if err != nil {
// This is not a fatal error, just a bad web page.
@@ -90,7 +90,7 @@ func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, _
}
for _, link := range links {
- if err := c.Enqueue(link, depth+1); err != nil {
+ if err := p.Enqueue(link, depth+1); err != nil {
return err
}
}
@@ -127,7 +127,7 @@ func (h *warcSaveHandler) writeWARCRecord(typ, uri string, data []byte) error {
return w.Close()
}
-func (h *warcSaveHandler) Handle(c *crawl.Crawler, u string, depth int, resp *http.Response, _ error) error {
+func (h *warcSaveHandler) Handle(p crawl.Publisher, u string, depth int, resp *http.Response, _ error) error {
// Read the response body (so we can save it to the WARC
// output) and replace it with a buffer.
data, derr := ioutil.ReadAll(resp.Body)
@@ -157,7 +157,7 @@ func (h *warcSaveHandler) Handle(c *crawl.Crawler, u string, depth int, resp *ht
h.numWritten++
- return extractLinks(c, u, depth, resp, nil)
+ return extractLinks(p, u, depth, resp, nil)
}
func newWarcSaveHandler(w *warc.Writer) (crawl.Handler, error) {
diff --git a/cmd/links/links.go b/cmd/links/links.go
index bf91f3f..2263414 100644
--- a/cmd/links/links.go
+++ b/cmd/links/links.go
@@ -20,7 +20,7 @@ var (
validSchemes = flag.String("schemes", "http,https", "comma-separated list of allowed protocols")
)
-func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, _ error) error {
+func extractLinks(p crawl.Publisher, u string, depth int, resp *http.Response, _ error) error {
links, err := analysis.GetLinks(resp)
if err != nil {
// Not a fatal error, just a bad web page.
@@ -28,7 +28,7 @@ func extractLinks(c *crawl.Crawler, u string, depth int, resp *http.Response, _
}
for _, link := range links {
- if err := c.Enqueue(link, depth+1); err != nil {
+ if err := p.Enqueue(link, depth+1); err != nil {
return err
}
}