diff options
author | Jordan <me@jordan.im> | 2021-12-07 11:07:13 -0700 |
---|---|---|
committer | Jordan <me@jordan.im> | 2021-12-07 11:07:13 -0700 |
commit | e4e052cf2709835217a748db7674bb6b45d0a8cf (patch) | |
tree | e82c5886b04a532c155f4225ab167b40c6b72380 | |
parent | 1bb7d04b6f090044a31638bd2adbeac004567c4b (diff) | |
download | keep-e4e052cf2709835217a748db7674bb6b45d0a8cf.tar.gz keep-e4e052cf2709835217a748db7674bb6b45d0a8cf.zip |
archive, keep: host -> regexp ignore list
-rw-r--r-- | archive.go | 12 | ||||
-rw-r--r-- | archive_test.go | 24 | ||||
-rw-r--r-- | keep.go | 7 | ||||
-rw-r--r-- | keep.json | 12 |
4 files changed, 42 insertions, 13 deletions
@@ -4,6 +4,7 @@ import ( "encoding/json" "log" "net/http" + "regexp" "strconv" "time" ) @@ -14,11 +15,6 @@ var ( TIMEOUT time.Duration = 25 client *http.Client = &http.Client{Timeout: TIMEOUT * time.Second} - - ignoreList = []string{"cdn.discordapp.com", "discord.com", "tenor.com", - "c.tenor.com", "archive.org", "web.archive.org", "youtu.be", - "youtube.com", "www.youtube.com", "discord.gg", "media.discordapp.net", - "open.spotify.com", "i.redd.it", "v.redd.it"} ) type Wayback struct { @@ -34,11 +30,11 @@ type Closest struct { Status string `json:"status"` } -func isIgnored(host string) bool { +func isIgnored(regex []string, url string) bool { - for _, h := range ignoreList { + for _, r := range regex { - if host == h { + if v := regexp.MustCompile(r); v.MatchString(url) { return true } } diff --git a/archive_test.go b/archive_test.go index 096b628..7e7d4ff 100644 --- a/archive_test.go +++ b/archive_test.go @@ -9,7 +9,7 @@ func TestIsArchived(t *testing.T) { url := "http://example.com/" archived, status := isArchived(url) - if archived != true || status != 200 { + if !archived || status != 200 { t.Errorf("Received %t, %d; want %t, %d", archived, status, true, 200) } } @@ -18,11 +18,31 @@ func TestIsNotArchived(t *testing.T) { url := "http://invalidurl.local/" archived, _ := isArchived(url) - if archived == true { + if archived { t.Errorf("Received %t; want %t", archived, false) } } +func TestIsIgnored(t *testing.T) { + + ignoreRegex := []string{`^https?://([^/]*\.)?example\.[^/]+/`} + url := "https://example.com/path" + ignored := isIgnored(ignoreRegex, url) + if !ignored { + t.Errorf("Received %t; want %t", ignored, true) + } +} + +func TestIsNotIgnored(t *testing.T) { + + ignoreRegex := []string{`^https?://([^/]*\.)?example\.[^/]+/`} + url := "https://google.com/path" + ignored := isIgnored(ignoreRegex, url) + if ignored { + t.Errorf("Received %t; want %t", ignored, false) + } +} + func TestArchive200(t *testing.T) { url := "http://example.com/" @@ -22,8 +22,9 @@ import ( ) type Config struct { - Token string `json:"token"` - Verbose bool `json:"verbose"` + Token string `json:"token"` + Verbose bool `json:"verbose"` + Ignore []string `json:"ignore"` } type Message struct { @@ -170,7 +171,7 @@ func messageCreate(s *discordgo.Session, m *discordgo.MessageCreate) { } // Ensure host is not present in ignoreList set - if isIgnored(u.Host) { + if isIgnored(config.Ignore, w) { continue } @@ -1,4 +1,16 @@ { "token":"YXiHglqrSrEXRSIX83PhbPxskICaEOFTiUo757i57o1ffk67Zgb2qORhLq1", "verbose":false + "ignore": [ + "^https?://(www\.)?reddit\.com/gallery", + "^https?://([^/]*\.)?discordapp\.[^/]+/", + "^https?://([^/]*\.)?discord\.[^/]+/", + "^https?://([^/]*\.)?tenor\.[^/]+/", + "^https?://([^/]*\.)?archive\.org/", + "^https?://([^/]*\.)?reddit\.com/gallery`", + "^https?://([^/]*\.)?youtu\.be/", + "^https?://([^/]*\.)?spotify\.com/", + "^https?://(www\.)?youtube\.com/", + "^https?://([^/]*\.)?redd\.it/" + ] } |