aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2021-12-07 11:07:13 -0700
committerJordan <me@jordan.im>2021-12-07 11:07:13 -0700
commite4e052cf2709835217a748db7674bb6b45d0a8cf (patch)
treee82c5886b04a532c155f4225ab167b40c6b72380
parent1bb7d04b6f090044a31638bd2adbeac004567c4b (diff)
downloadkeep-e4e052cf2709835217a748db7674bb6b45d0a8cf.tar.gz
keep-e4e052cf2709835217a748db7674bb6b45d0a8cf.zip
archive, keep: host -> regexp ignore list
-rw-r--r--archive.go12
-rw-r--r--archive_test.go24
-rw-r--r--keep.go7
-rw-r--r--keep.json12
4 files changed, 42 insertions, 13 deletions
diff --git a/archive.go b/archive.go
index 7f44078..924e6f4 100644
--- a/archive.go
+++ b/archive.go
@@ -4,6 +4,7 @@ import (
"encoding/json"
"log"
"net/http"
+ "regexp"
"strconv"
"time"
)
@@ -14,11 +15,6 @@ var (
TIMEOUT time.Duration = 25
client *http.Client = &http.Client{Timeout: TIMEOUT * time.Second}
-
- ignoreList = []string{"cdn.discordapp.com", "discord.com", "tenor.com",
- "c.tenor.com", "archive.org", "web.archive.org", "youtu.be",
- "youtube.com", "www.youtube.com", "discord.gg", "media.discordapp.net",
- "open.spotify.com", "i.redd.it", "v.redd.it"}
)
type Wayback struct {
@@ -34,11 +30,11 @@ type Closest struct {
Status string `json:"status"`
}
-func isIgnored(host string) bool {
+func isIgnored(regex []string, url string) bool {
- for _, h := range ignoreList {
+ for _, r := range regex {
- if host == h {
+ if v := regexp.MustCompile(r); v.MatchString(url) {
return true
}
}
diff --git a/archive_test.go b/archive_test.go
index 096b628..7e7d4ff 100644
--- a/archive_test.go
+++ b/archive_test.go
@@ -9,7 +9,7 @@ func TestIsArchived(t *testing.T) {
url := "http://example.com/"
archived, status := isArchived(url)
- if archived != true || status != 200 {
+ if !archived || status != 200 {
t.Errorf("Received %t, %d; want %t, %d", archived, status, true, 200)
}
}
@@ -18,11 +18,31 @@ func TestIsNotArchived(t *testing.T) {
url := "http://invalidurl.local/"
archived, _ := isArchived(url)
- if archived == true {
+ if archived {
t.Errorf("Received %t; want %t", archived, false)
}
}
+func TestIsIgnored(t *testing.T) {
+
+ ignoreRegex := []string{`^https?://([^/]*\.)?example\.[^/]+/`}
+ url := "https://example.com/path"
+ ignored := isIgnored(ignoreRegex, url)
+ if !ignored {
+ t.Errorf("Received %t; want %t", ignored, true)
+ }
+}
+
+func TestIsNotIgnored(t *testing.T) {
+
+ ignoreRegex := []string{`^https?://([^/]*\.)?example\.[^/]+/`}
+ url := "https://google.com/path"
+ ignored := isIgnored(ignoreRegex, url)
+ if ignored {
+ t.Errorf("Received %t; want %t", ignored, false)
+ }
+}
+
func TestArchive200(t *testing.T) {
url := "http://example.com/"
diff --git a/keep.go b/keep.go
index 46361b5..53cfe68 100644
--- a/keep.go
+++ b/keep.go
@@ -22,8 +22,9 @@ import (
)
type Config struct {
- Token string `json:"token"`
- Verbose bool `json:"verbose"`
+ Token string `json:"token"`
+ Verbose bool `json:"verbose"`
+ Ignore []string `json:"ignore"`
}
type Message struct {
@@ -170,7 +171,7 @@ func messageCreate(s *discordgo.Session, m *discordgo.MessageCreate) {
}
// Ensure host is not present in ignoreList set
- if isIgnored(u.Host) {
+ if isIgnored(config.Ignore, w) {
continue
}
diff --git a/keep.json b/keep.json
index 4882042..ea66b37 100644
--- a/keep.json
+++ b/keep.json
@@ -1,4 +1,16 @@
{
"token":"YXiHglqrSrEXRSIX83PhbPxskICaEOFTiUo757i57o1ffk67Zgb2qORhLq1",
"verbose":false
+ "ignore": [
+ "^https?://(www\.)?reddit\.com/gallery",
+ "^https?://([^/]*\.)?discordapp\.[^/]+/",
+ "^https?://([^/]*\.)?discord\.[^/]+/",
+ "^https?://([^/]*\.)?tenor\.[^/]+/",
+ "^https?://([^/]*\.)?archive\.org/",
+ "^https?://([^/]*\.)?reddit\.com/gallery`",
+ "^https?://([^/]*\.)?youtu\.be/",
+ "^https?://([^/]*\.)?spotify\.com/",
+ "^https?://(www\.)?youtube\.com/",
+ "^https?://([^/]*\.)?redd\.it/"
+ ]
}