diff options
author | ale <ale@incal.net> | 2017-12-19 00:14:58 +0000 |
---|---|---|
committer | ale <ale@incal.net> | 2017-12-19 00:14:58 +0000 |
commit | 665f7ba1d075c36c010123e3e1566a0a04e89c7d (patch) | |
tree | 0a63923857143d05bcd4ab0cb00de8e43bb01461 /analysis | |
parent | 4cd67e7234943baf31b2e122f8ee3c70c21fb489 (diff) | |
download | crawl-665f7ba1d075c36c010123e3e1566a0a04e89c7d.tar.gz crawl-665f7ba1d075c36c010123e3e1566a0a04e89c7d.zip |
Skip data: URLs
Diffstat (limited to 'analysis')
-rw-r--r-- | analysis/links.go | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/analysis/links.go b/analysis/links.go index 5d61547..3f5a795 100644 --- a/analysis/links.go +++ b/analysis/links.go @@ -71,6 +71,10 @@ func GetLinks(resp *http.Response) ([]crawl.Outlink, error) { var result []crawl.Outlink links := make(map[string]crawl.Outlink) for _, l := range outlinks { + // Skip data: URLs altogether. + if strings.HasPrefix(l.URL, "data:") { + continue + } if linkurl, err := resp.Request.URL.Parse(l.URL); err == nil { links[linkurl.String()] = crawl.Outlink{ URL: linkurl, |