diff options
author | Jordan <me@jordan.im> | 2021-12-03 07:19:28 -0700 |
---|---|---|
committer | Jordan <me@jordan.im> | 2021-12-03 07:19:28 -0700 |
commit | b0835fc63877c63e88e75840c5d52d3167ad5e10 (patch) | |
tree | 9db697711cab98fa9c49f87d40d6f7212baae19c | |
download | keep-b0835fc63877c63e88e75840c5d52d3167ad5e10.tar.gz keep-b0835fc63877c63e88e75840c5d52d3167ad5e10.zip |
initial commit
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Makefile | 29 | ||||
-rw-r--r-- | README.md | 21 | ||||
-rw-r--r-- | archive.go | 73 | ||||
-rw-r--r-- | archive_test.go | 24 | ||||
-rw-r--r-- | db.go | 78 | ||||
-rw-r--r-- | db_test.go | 48 | ||||
-rw-r--r-- | go.mod | 9 | ||||
-rw-r--r-- | go.sum | 15 | ||||
-rw-r--r-- | keep.go | 187 | ||||
-rw-r--r-- | keep.json | 4 |
11 files changed, 491 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1abb790 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.swp +*.swo +keep diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ddff8eb --- /dev/null +++ b/Makefile @@ -0,0 +1,29 @@ +.POSIX: +.SUFFIXES: + +GO = go +RM = rm +GOFLAGS = +PREFIX = /usr/local +BINDIR = $(PREFIX)/bin +CONFIGDIR = $(HOME)/.keep + +goflags = $(GOFLAGS) + +all: keep + +keep: + $(GO) build $(goflags) -ldflags "-X main.buildPrefix=$(PREFIX)" + +clean: + $(RM) -f keep + +test: + $(GO) test -v ./... + +install: all + mkdir -p $(DESTDIR)$(BINDIR) + mkdir -p $(DESTDIR)$(CONFIGDIR) + cp -f keep $(DESTDIR)$(BINDIR) + cp -n keep.json $(DESTDIR)$(CONFIGDIR) + diff --git a/README.md b/README.md new file mode 100644 index 0000000..f5046de --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# Keep + +Keep is a minimal Discord bot which saves any URLs parsed from messages visible +to the configured account on the Wayback Machine. + +A local cache of saved URLs is kept to prevent duplicate availability API +requests. + +## Installation + +Keep can be compiled with `make` or `go build`, and installed system-wide by +running `make install` with root-level permissions. Tests can be run with `make +test`. + +## Usage + +``` +Usage of ./keep: + -config string + path to configuration file (default "/home/jordan/.keep/keep.json") +``` diff --git a/archive.go b/archive.go new file mode 100644 index 0000000..c88cff7 --- /dev/null +++ b/archive.go @@ -0,0 +1,73 @@ +package main + +import ( + "encoding/json" + "log" + "net/http" + "strconv" + "time" +) + +var ( + API_AVAILABILITY string = "http://archive.org/wayback/available?url=" + API_SAVE string = "https://web.archive.org/save/" + + TIMEOUT time.Duration = 25 + client *http.Client = &http.Client{Timeout: TIMEOUT * time.Second} + + blacklist = []string{"cdn.discordapp.com", "discord.com", "tenor.com", + "c.tenor.com", "archive.org", "web.archive.org", "youtu.be", + "youtube.com", "www.youtube.com"} +) + +type Wayback struct { + Snapshots Snapshot `json:"archived_snapshots,omitempty"` +} + +type Snapshot struct { + Recent Closest `json:"closest"` +} + +type Closest struct { + Available bool `json:"available"` + Status string `json:"status"` +} + +func isBlacklisted(host string) bool { + + for _, h := range blacklist { + + if host == h { + return true + } + } + return false +} + +func isArchived(url string) (bool, int) { + + req, err := http.NewRequest("GET", API_AVAILABILITY+url, nil) + resp, err := client.Do(req) + if err != nil { + log.Println(err) + return false, 0 + } + av := &Wayback{} + decoder := json.NewDecoder(resp.Body) + if err := decoder.Decode(av); err != nil { + log.Println(err) + return false, 0 + } + status, _ := strconv.Atoi(av.Snapshots.Recent.Status) + return av.Snapshots.Recent.Available, status +} + +func archive(url string) int { + + req, err := http.NewRequest("GET", API_SAVE+url, nil) + resp, err := client.Do(req) + if err != nil { + return 0 + } + return resp.StatusCode +} diff --git a/archive_test.go b/archive_test.go new file mode 100644 index 0000000..7c4549e --- /dev/null +++ b/archive_test.go @@ -0,0 +1,24 @@ +package main + +import ( + "net/http" + "testing" +) + +func TestIsArchived(t *testing.T) { + + url := "http://example.com/" + archived, status := isArchived(url) + if archived != true || status != 200 { + t.Errorf("Received %t, %d: want %t, %d", archived, status, true, 200) + } +} + +func TestArchive200(t *testing.T) { + + url := "http://example.com/" + status := archive(url) + if status != http.StatusOK { + t.Errorf("Recieved %d; want %d", status, http.StatusOK) + } +} @@ -0,0 +1,78 @@ +package main + +import ( + "database/sql" + "errors" + "log" + "os" + + _ "github.com/mattn/go-sqlite3" +) + +func initDB(path string) *sql.DB { + + if _, err := os.Stat(path); errors.Is(err, os.ErrNotExist) { + log.Printf("Creating %s...\n", path) + file, err := os.Create(path) + if err != nil { + log.Fatal(err) + } + file.Close() + + db, _ := sql.Open("sqlite3", path) + initTables(db) + return db + } else { + db, err := sql.Open("sqlite3", path) + if err != nil { + log.Fatal(err) + } + return db + } +} + +func initTables(db *sql.DB) { + + q := `CREATE TABLE IF NOT EXISTS urls ( + id integer NOT NULL PRIMARY KEY AUTOINCREMENT, + url VARCHAR(500) NOT NULL, + author_id VARCHAR(18), + guild_id VARCHAR(18), + channel_id VARCHAR(18), + status_code INTEGER + ); + CREATE UNIQUE INDEX idx_urls_url ON urls(url);` + s, err := db.Prepare(q) + if err != nil { + log.Fatal(err) + } + s.Exec() +} + +func addArchived(db *sql.DB, m *Message, status_code int) { + + q := `INSERT OR IGNORE INTO urls(url, author_id, guild_id, channel_id, status_code) VALUES (?, ?, ?, ?, ?)` + s, err := db.Prepare(q) + if err != nil { + log.Fatal(err) + } + defer s.Close() + _, err = s.Exec(m.URL, m.Author, m.Guild, m.Channel, status_code) + if err != nil { + log.Fatal(err) + } +} + +func isCached(db *sql.DB, url string) (bool, int) { + + var status_code int + err := db.QueryRow("SELECT status_code FROM urls WHERE url = ?", + url).Scan(&status_code) + switch { + case err == sql.ErrNoRows: + return false, status_code + case err != nil: + log.Fatal(err) + } + return true, status_code +} diff --git a/db_test.go b/db_test.go new file mode 100644 index 0000000..b429aaf --- /dev/null +++ b/db_test.go @@ -0,0 +1,48 @@ +package main + +import ( + "database/sql" + "io/ioutil" + "net/http" + "os" + "testing" +) + +var ( + db *sql.DB + db_path string +) + +func TestInitDB(t *testing.T) { + + tmpDB, _ := ioutil.TempFile("", "tmp-*.db") + db_path = tmpDB.Name() + os.Remove(db_path) + db = initDB(db_path) +} + +func TestAddArchived(t *testing.T) { + + m := Message{ + URL: "http://example.com/", + Author: "000000000000000000", + Guild: "000000000000000000", + Channel: "000000000000000000", + } + addArchived(db, &m, 200) +} + +func TestIsCached(t *testing.T) { + + url := "http://example.com/" + cached, status_code := isCached(db, url) + if status_code != http.StatusOK || cached != true { + t.Errorf("Received %t, %d; wanted %t, %d", cached, status_code, true, + http.StatusOK) + } +} + +func TestDBCleanup(t *testing.T) { + + os.Remove(db_path) +} @@ -0,0 +1,9 @@ +module keep + +go 1.16 + +require ( + github.com/bwmarrin/discordgo v0.23.2 // indirect + github.com/mattn/go-sqlite3 v1.14.9 // indirect + golang.org/x/net v0.0.0-20211201190559-0a0e4e1bb54c // indirect +) @@ -0,0 +1,15 @@ +github.com/bwmarrin/discordgo v0.23.2 h1:BzrtTktixGHIu9Tt7dEE6diysEF9HWnXeHuoJEt2fH4= +github.com/bwmarrin/discordgo v0.23.2/go.mod h1:c1WtWUGN6nREDmzIpyTp/iD3VYt4Fpx+bVyfBG7JE+M= +github.com/gorilla/websocket v1.4.0 h1:WDFjx/TMzVgy9VdMMQi2K2Emtwi2QcUQsztZ/zLaH/Q= +github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/mattn/go-sqlite3 v1.14.9 h1:10HX2Td0ocZpYEjhilsuo6WWtUqttj2Kb0KtD86/KYA= +github.com/mattn/go-sqlite3 v1.14.9/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= +golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16 h1:y6ce7gCWtnH+m3dCjzQ1PCuwl28DDIc3VNnvY29DlIA= +golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/net v0.0.0-20211201190559-0a0e4e1bb54c h1:WtYZ93XtWSO5KlOMgPZu7hXY9WhMZpprvlm5VwvAl8c= +golang.org/x/net v0.0.0-20211201190559-0a0e4e1bb54c/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -0,0 +1,187 @@ +package main + +import ( + "database/sql" + "encoding/json" + "flag" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/url" + "os" + "os/signal" + "os/user" + "path" + "strings" + "syscall" + "time" + + "github.com/bwmarrin/discordgo" + "golang.org/x/net/publicsuffix" +) + +type Config struct { + Token string `json:"token"` + Verbose bool `json:"verbose"` +} + +type Message struct { + URL string + Author string + Guild string + Channel string +} + +var ( + messageChan chan *Message + config Config +) + +func main() { + + // Create ~/.keep directory in user's home to store db + user, err := user.Current() + if err != nil { + log.Fatal(err) + } + keepDir := path.Join(user.HomeDir, ".keep") + + // Default config location: ~/.keep/keep.json + var configPath string + flag.StringVar(&configPath, "config", path.Join(keepDir, "keep.json"), + "path to configuration file") + flag.Parse() + conf, err := ioutil.ReadFile(configPath) + if err != nil { + log.Fatal(err) + } + err = json.Unmarshal([]byte(conf), &config) + if err != nil { + log.Fatal(err) + } + + // Create and initialize URL cache database + db := initDB(path.Join(keepDir, "keep.db")) + + // Channel for passing URLs to the archive goroutine for archival + messageChan = make(chan *Message, 25) + go archiver(db) + + // Create a new Discord session using provided credentials + dg, err := discordgo.New(config.Token) + if err != nil { + fmt.Println("error creating Discord session,", err) + return + } + + // Make our client look like Firefox since we're authenticating with + // user/pass credentials (self bot) + dg.UserAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:94.0) Gecko/20100101 Firefox/94.0 " + + // Register the messageCreate func as a callback for MessageCreate events + dg.AddHandler(messageCreate) + + // We only care about receiving message events + dg.Identify.Intents = discordgo.IntentsGuildMessages + + // Open a websocket connection to Discord and begin listening + err = dg.Open() + if err != nil { + fmt.Println("error opening connection,", err) + return + } + + // Wait here until CTRL-C or other term signal is received + sc := make(chan os.Signal, 1) + signal.Notify(sc, syscall.SIGINT, syscall.SIGTERM, os.Interrupt, os.Kill) + <-sc + + // Cleanly close down the Discord session + dg.Close() +} + +// archiver is intended to be run in its own goroutine, receiving URLs from main +// over a shared channel for processing +func archiver(db *sql.DB) { + + // Each iteration removes and processes one url from the channel + for { + + // Blocks until URL is received + message := <-messageChan + + // Skip if we have URL in database and status OK + cached, status_code := isCached(db, message.URL) + if cached && status_code == http.StatusOK { + log.Printf("%d %s", status_code, message.URL) + continue + } + + // Skip if the Internet Archive already has a copy available + archived, status_code := isArchived(message.URL) + if archived && status_code == http.StatusOK { + addArchived(db, message, status_code) + log.Printf("%d %s", status_code, message.URL) + continue + } + + // Archive, URL is not present in cache or IA + status_code = archive(message.URL) + addArchived(db, message, status_code) + log.Printf("%d %s", status_code, message.URL) + + // Limit requests to Wayback API to 5-second intervals + time.Sleep(5 * time.Second) + } +} + +// messageCreate be called (due to AddHandler above) every time a new message is +// created on any channel that the authenticated bot has access to +func messageCreate(s *discordgo.Session, m *discordgo.MessageCreate) { + + // https://github.com/bwmarrin/discordgo/issues/961 + if m.Content == "" { + chanMsgs, err := s.ChannelMessages(m.ChannelID, 1, "", "", m.ID) + if err != nil { + log.Printf("Unable to get messages: %s", err) + return + } + m.Content = chanMsgs[0].Content + m.Attachments = chanMsgs[0].Attachments + } + + // Log all messages if verbose set to true + if config.Verbose { + log.Println(m.Content) + } + + // Split message by spaces into individual fields + for _, w := range strings.Fields(m.Content) { + + // Assess whether message part looks like a valid URL + u, err := url.Parse(w) + if err != nil || !u.IsAbs() || strings.IndexByte(u.Host, '.') <= 0 { + continue + } + + // Ensure domain TLD is ICANN-managed + if _, icann := publicsuffix.PublicSuffix(u.Host); !icann { + continue + } + + // Ensure host is not present in blacklisted set + if isBlacklisted(u.Host) { + continue + } + + // Send message attributes/URL over the channel + message := Message{ + URL: w, + Author: m.Author.ID, + Guild: m.GuildID, + Channel: m.ChannelID, + } + messageChan <- &message + } +} diff --git a/keep.json b/keep.json new file mode 100644 index 0000000..4882042 --- /dev/null +++ b/keep.json @@ -0,0 +1,4 @@ +{ + "token":"YXiHglqrSrEXRSIX83PhbPxskICaEOFTiUo757i57o1ffk67Zgb2qORhLq1", + "verbose":false +} |