aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorale <ale@incal.net>2017-12-19 00:14:58 +0000
committerale <ale@incal.net>2017-12-19 00:14:58 +0000
commit665f7ba1d075c36c010123e3e1566a0a04e89c7d (patch)
tree0a63923857143d05bcd4ab0cb00de8e43bb01461
parent4cd67e7234943baf31b2e122f8ee3c70c21fb489 (diff)
downloadcrawl-665f7ba1d075c36c010123e3e1566a0a04e89c7d.tar.gz
crawl-665f7ba1d075c36c010123e3e1566a0a04e89c7d.zip
Skip data: URLs
-rw-r--r--analysis/links.go4
1 files changed, 4 insertions, 0 deletions
diff --git a/analysis/links.go b/analysis/links.go
index 5d61547..3f5a795 100644
--- a/analysis/links.go
+++ b/analysis/links.go
@@ -71,6 +71,10 @@ func GetLinks(resp *http.Response) ([]crawl.Outlink, error) {
var result []crawl.Outlink
links := make(map[string]crawl.Outlink)
for _, l := range outlinks {
+ // Skip data: URLs altogether.
+ if strings.HasPrefix(l.URL, "data:") {
+ continue
+ }
if linkurl, err := resp.Request.URL.Parse(l.URL); err == nil {
links[linkurl.String()] = crawl.Outlink{
URL: linkurl,