From e4e052cf2709835217a748db7674bb6b45d0a8cf Mon Sep 17 00:00:00 2001 From: Jordan Date: Tue, 7 Dec 2021 11:07:13 -0700 Subject: archive, keep: host -> regexp ignore list --- archive.go | 12 ++++-------- archive_test.go | 24 ++++++++++++++++++++++-- keep.go | 7 ++++--- keep.json | 12 ++++++++++++ 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/archive.go b/archive.go index 7f44078..924e6f4 100644 --- a/archive.go +++ b/archive.go @@ -4,6 +4,7 @@ import ( "encoding/json" "log" "net/http" + "regexp" "strconv" "time" ) @@ -14,11 +15,6 @@ var ( TIMEOUT time.Duration = 25 client *http.Client = &http.Client{Timeout: TIMEOUT * time.Second} - - ignoreList = []string{"cdn.discordapp.com", "discord.com", "tenor.com", - "c.tenor.com", "archive.org", "web.archive.org", "youtu.be", - "youtube.com", "www.youtube.com", "discord.gg", "media.discordapp.net", - "open.spotify.com", "i.redd.it", "v.redd.it"} ) type Wayback struct { @@ -34,11 +30,11 @@ type Closest struct { Status string `json:"status"` } -func isIgnored(host string) bool { +func isIgnored(regex []string, url string) bool { - for _, h := range ignoreList { + for _, r := range regex { - if host == h { + if v := regexp.MustCompile(r); v.MatchString(url) { return true } } diff --git a/archive_test.go b/archive_test.go index 096b628..7e7d4ff 100644 --- a/archive_test.go +++ b/archive_test.go @@ -9,7 +9,7 @@ func TestIsArchived(t *testing.T) { url := "http://example.com/" archived, status := isArchived(url) - if archived != true || status != 200 { + if !archived || status != 200 { t.Errorf("Received %t, %d; want %t, %d", archived, status, true, 200) } } @@ -18,11 +18,31 @@ func TestIsNotArchived(t *testing.T) { url := "http://invalidurl.local/" archived, _ := isArchived(url) - if archived == true { + if archived { t.Errorf("Received %t; want %t", archived, false) } } +func TestIsIgnored(t *testing.T) { + + ignoreRegex := []string{`^https?://([^/]*\.)?example\.[^/]+/`} + url := "https://example.com/path" + ignored := isIgnored(ignoreRegex, url) + if !ignored { + t.Errorf("Received %t; want %t", ignored, true) + } +} + +func TestIsNotIgnored(t *testing.T) { + + ignoreRegex := []string{`^https?://([^/]*\.)?example\.[^/]+/`} + url := "https://google.com/path" + ignored := isIgnored(ignoreRegex, url) + if ignored { + t.Errorf("Received %t; want %t", ignored, false) + } +} + func TestArchive200(t *testing.T) { url := "http://example.com/" diff --git a/keep.go b/keep.go index 46361b5..53cfe68 100644 --- a/keep.go +++ b/keep.go @@ -22,8 +22,9 @@ import ( ) type Config struct { - Token string `json:"token"` - Verbose bool `json:"verbose"` + Token string `json:"token"` + Verbose bool `json:"verbose"` + Ignore []string `json:"ignore"` } type Message struct { @@ -170,7 +171,7 @@ func messageCreate(s *discordgo.Session, m *discordgo.MessageCreate) { } // Ensure host is not present in ignoreList set - if isIgnored(u.Host) { + if isIgnored(config.Ignore, w) { continue } diff --git a/keep.json b/keep.json index 4882042..ea66b37 100644 --- a/keep.json +++ b/keep.json @@ -1,4 +1,16 @@ { "token":"YXiHglqrSrEXRSIX83PhbPxskICaEOFTiUo757i57o1ffk67Zgb2qORhLq1", "verbose":false + "ignore": [ + "^https?://(www\.)?reddit\.com/gallery", + "^https?://([^/]*\.)?discordapp\.[^/]+/", + "^https?://([^/]*\.)?discord\.[^/]+/", + "^https?://([^/]*\.)?tenor\.[^/]+/", + "^https?://([^/]*\.)?archive\.org/", + "^https?://([^/]*\.)?reddit\.com/gallery`", + "^https?://([^/]*\.)?youtu\.be/", + "^https?://([^/]*\.)?spotify\.com/", + "^https?://(www\.)?youtube\.com/", + "^https?://([^/]*\.)?redd\.it/" + ] } -- cgit v1.2.3-54-g00ecf