aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorale <ale@incal.net>2018-08-31 11:09:12 +0100
committerale <ale@incal.net>2018-08-31 11:09:12 +0100
commit81e16ffb3885a311e4a460880c2a7ec55eba4295 (patch)
tree8a7fa9bf5cb4e1b73d4a3732308e1f102c384890
parent23a80bd68c5c51967eaf4e6a857c5d59fe58daf5 (diff)
downloadcrawl-81e16ffb3885a311e4a460880c2a7ec55eba4295.tar.gz
crawl-81e16ffb3885a311e4a460880c2a7ec55eba4295.zip
Do not drop /index.html at the end of URLs
-rw-r--r--crawler.go2
1 files changed, 1 insertions, 1 deletions
diff --git a/crawler.go b/crawler.go
index 9776cfc..d5c2e88 100644
--- a/crawler.go
+++ b/crawler.go
@@ -158,7 +158,7 @@ func (c *Crawler) Enqueue(link Outlink, depth int) error {
}
// Normalize the URL.
- urlStr := purell.NormalizeURL(link.URL, purell.FlagsSafe|purell.FlagRemoveDotSegments|purell.FlagRemoveDuplicateSlashes|purell.FlagRemoveFragment|purell.FlagRemoveDirectoryIndex|purell.FlagSortQuery)
+ urlStr := purell.NormalizeURL(link.URL, purell.FlagsSafe|purell.FlagRemoveDotSegments|purell.FlagRemoveDuplicateSlashes|purell.FlagRemoveFragment|purell.FlagSortQuery)
// Protect the read-modify-update below with a mutex.
c.enqueueMx.Lock()