diff options
author | ale <ale@incal.net> | 2018-08-31 11:09:12 +0100 |
---|---|---|
committer | ale <ale@incal.net> | 2018-08-31 11:09:12 +0100 |
commit | 81e16ffb3885a311e4a460880c2a7ec55eba4295 (patch) | |
tree | 8a7fa9bf5cb4e1b73d4a3732308e1f102c384890 | |
parent | 23a80bd68c5c51967eaf4e6a857c5d59fe58daf5 (diff) | |
download | crawl-81e16ffb3885a311e4a460880c2a7ec55eba4295.tar.gz crawl-81e16ffb3885a311e4a460880c2a7ec55eba4295.zip |
Do not drop /index.html at the end of URLs
-rw-r--r-- | crawler.go | 2 |
1 files changed, 1 insertions, 1 deletions
@@ -158,7 +158,7 @@ func (c *Crawler) Enqueue(link Outlink, depth int) error { } // Normalize the URL. - urlStr := purell.NormalizeURL(link.URL, purell.FlagsSafe|purell.FlagRemoveDotSegments|purell.FlagRemoveDuplicateSlashes|purell.FlagRemoveFragment|purell.FlagRemoveDirectoryIndex|purell.FlagSortQuery) + urlStr := purell.NormalizeURL(link.URL, purell.FlagsSafe|purell.FlagRemoveDotSegments|purell.FlagRemoveDuplicateSlashes|purell.FlagRemoveFragment|purell.FlagSortQuery) // Protect the read-modify-update below with a mutex. c.enqueueMx.Lock() |