diff options
Diffstat (limited to 'analysis')
-rw-r--r-- | analysis/links.go | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/analysis/links.go b/analysis/links.go index c0663fa..f1b3e99 100644 --- a/analysis/links.go +++ b/analysis/links.go @@ -7,6 +7,7 @@ import ( "io" "io/ioutil" "net/http" + "os" "regexp" "strings" @@ -41,12 +42,12 @@ type rawOutlink struct { // GetLinks returns all the links found in a document. Currently only // parses HTML pages and CSS stylesheets. -func GetLinks(resp *http.Response) ([]crawl.Outlink, error) { +func GetLinks(resp *http.Response, body *os.File) ([]crawl.Outlink, error) { // Parse outbound links relative to the request URI, and // return unique results. var result []crawl.Outlink links := make(map[string]crawl.Outlink) - for _, l := range extractLinks(resp) { + for _, l := range extractLinks(resp, body) { // Skip data: URLs altogether. if strings.HasPrefix(l.URL, "data:") { continue @@ -64,13 +65,13 @@ func GetLinks(resp *http.Response) ([]crawl.Outlink, error) { return result, nil } -func extractLinks(resp *http.Response) []rawOutlink { +func extractLinks(resp *http.Response, body *os.File) []rawOutlink { ctype := resp.Header.Get("Content-Type") switch { case strings.HasPrefix(ctype, "text/html"): - return extractLinksFromHTML(resp.Body, nil) + return extractLinksFromHTML(body, nil) case strings.HasPrefix(ctype, "text/css"): - return extractLinksFromCSS(resp.Body, nil) + return extractLinksFromCSS(body, nil) default: return nil } |