aboutsummaryrefslogtreecommitdiff
path: root/keep.go
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2021-12-03 07:19:28 -0700
committerJordan <me@jordan.im>2021-12-03 07:19:28 -0700
commitb0835fc63877c63e88e75840c5d52d3167ad5e10 (patch)
tree9db697711cab98fa9c49f87d40d6f7212baae19c /keep.go
downloadkeep-b0835fc63877c63e88e75840c5d52d3167ad5e10.tar.gz
keep-b0835fc63877c63e88e75840c5d52d3167ad5e10.zip
initial commit
Diffstat (limited to 'keep.go')
-rw-r--r--keep.go187
1 files changed, 187 insertions, 0 deletions
diff --git a/keep.go b/keep.go
new file mode 100644
index 0000000..3ef7248
--- /dev/null
+++ b/keep.go
@@ -0,0 +1,187 @@
+package main
+
+import (
+ "database/sql"
+ "encoding/json"
+ "flag"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "net/http"
+ "net/url"
+ "os"
+ "os/signal"
+ "os/user"
+ "path"
+ "strings"
+ "syscall"
+ "time"
+
+ "github.com/bwmarrin/discordgo"
+ "golang.org/x/net/publicsuffix"
+)
+
+type Config struct {
+ Token string `json:"token"`
+ Verbose bool `json:"verbose"`
+}
+
+type Message struct {
+ URL string
+ Author string
+ Guild string
+ Channel string
+}
+
+var (
+ messageChan chan *Message
+ config Config
+)
+
+func main() {
+
+ // Create ~/.keep directory in user's home to store db
+ user, err := user.Current()
+ if err != nil {
+ log.Fatal(err)
+ }
+ keepDir := path.Join(user.HomeDir, ".keep")
+
+ // Default config location: ~/.keep/keep.json
+ var configPath string
+ flag.StringVar(&configPath, "config", path.Join(keepDir, "keep.json"),
+ "path to configuration file")
+ flag.Parse()
+ conf, err := ioutil.ReadFile(configPath)
+ if err != nil {
+ log.Fatal(err)
+ }
+ err = json.Unmarshal([]byte(conf), &config)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ // Create and initialize URL cache database
+ db := initDB(path.Join(keepDir, "keep.db"))
+
+ // Channel for passing URLs to the archive goroutine for archival
+ messageChan = make(chan *Message, 25)
+ go archiver(db)
+
+ // Create a new Discord session using provided credentials
+ dg, err := discordgo.New(config.Token)
+ if err != nil {
+ fmt.Println("error creating Discord session,", err)
+ return
+ }
+
+ // Make our client look like Firefox since we're authenticating with
+ // user/pass credentials (self bot)
+ dg.UserAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:94.0) Gecko/20100101 Firefox/94.0 "
+
+ // Register the messageCreate func as a callback for MessageCreate events
+ dg.AddHandler(messageCreate)
+
+ // We only care about receiving message events
+ dg.Identify.Intents = discordgo.IntentsGuildMessages
+
+ // Open a websocket connection to Discord and begin listening
+ err = dg.Open()
+ if err != nil {
+ fmt.Println("error opening connection,", err)
+ return
+ }
+
+ // Wait here until CTRL-C or other term signal is received
+ sc := make(chan os.Signal, 1)
+ signal.Notify(sc, syscall.SIGINT, syscall.SIGTERM, os.Interrupt, os.Kill)
+ <-sc
+
+ // Cleanly close down the Discord session
+ dg.Close()
+}
+
+// archiver is intended to be run in its own goroutine, receiving URLs from main
+// over a shared channel for processing
+func archiver(db *sql.DB) {
+
+ // Each iteration removes and processes one url from the channel
+ for {
+
+ // Blocks until URL is received
+ message := <-messageChan
+
+ // Skip if we have URL in database and status OK
+ cached, status_code := isCached(db, message.URL)
+ if cached && status_code == http.StatusOK {
+ log.Printf("%d %s", status_code, message.URL)
+ continue
+ }
+
+ // Skip if the Internet Archive already has a copy available
+ archived, status_code := isArchived(message.URL)
+ if archived && status_code == http.StatusOK {
+ addArchived(db, message, status_code)
+ log.Printf("%d %s", status_code, message.URL)
+ continue
+ }
+
+ // Archive, URL is not present in cache or IA
+ status_code = archive(message.URL)
+ addArchived(db, message, status_code)
+ log.Printf("%d %s", status_code, message.URL)
+
+ // Limit requests to Wayback API to 5-second intervals
+ time.Sleep(5 * time.Second)
+ }
+}
+
+// messageCreate be called (due to AddHandler above) every time a new message is
+// created on any channel that the authenticated bot has access to
+func messageCreate(s *discordgo.Session, m *discordgo.MessageCreate) {
+
+ // https://github.com/bwmarrin/discordgo/issues/961
+ if m.Content == "" {
+ chanMsgs, err := s.ChannelMessages(m.ChannelID, 1, "", "", m.ID)
+ if err != nil {
+ log.Printf("Unable to get messages: %s", err)
+ return
+ }
+ m.Content = chanMsgs[0].Content
+ m.Attachments = chanMsgs[0].Attachments
+ }
+
+ // Log all messages if verbose set to true
+ if config.Verbose {
+ log.Println(m.Content)
+ }
+
+ // Split message by spaces into individual fields
+ for _, w := range strings.Fields(m.Content) {
+
+ // Assess whether message part looks like a valid URL
+ u, err := url.Parse(w)
+ if err != nil || !u.IsAbs() || strings.IndexByte(u.Host, '.') <= 0 {
+ continue
+ }
+
+ // Ensure domain TLD is ICANN-managed
+ if _, icann := publicsuffix.PublicSuffix(u.Host); !icann {
+ continue
+ }
+
+ // Ensure host is not present in blacklisted set
+ if isBlacklisted(u.Host) {
+ continue
+ }
+
+ // Send message attributes/URL over the channel
+ message := Message{
+ URL: w,
+ Author: m.Author.ID,
+ Guild: m.GuildID,
+ Channel: m.ChannelID,
+ }
+ messageChan <- &message
+ }
+}