diff options
author | Jordan <me@jordan.im> | 2021-12-03 07:19:28 -0700 |
---|---|---|
committer | Jordan <me@jordan.im> | 2021-12-03 07:19:28 -0700 |
commit | b0835fc63877c63e88e75840c5d52d3167ad5e10 (patch) | |
tree | 9db697711cab98fa9c49f87d40d6f7212baae19c /keep.go | |
download | keep-b0835fc63877c63e88e75840c5d52d3167ad5e10.tar.gz keep-b0835fc63877c63e88e75840c5d52d3167ad5e10.zip |
initial commit
Diffstat (limited to 'keep.go')
-rw-r--r-- | keep.go | 187 |
1 files changed, 187 insertions, 0 deletions
@@ -0,0 +1,187 @@ +package main + +import ( + "database/sql" + "encoding/json" + "flag" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/url" + "os" + "os/signal" + "os/user" + "path" + "strings" + "syscall" + "time" + + "github.com/bwmarrin/discordgo" + "golang.org/x/net/publicsuffix" +) + +type Config struct { + Token string `json:"token"` + Verbose bool `json:"verbose"` +} + +type Message struct { + URL string + Author string + Guild string + Channel string +} + +var ( + messageChan chan *Message + config Config +) + +func main() { + + // Create ~/.keep directory in user's home to store db + user, err := user.Current() + if err != nil { + log.Fatal(err) + } + keepDir := path.Join(user.HomeDir, ".keep") + + // Default config location: ~/.keep/keep.json + var configPath string + flag.StringVar(&configPath, "config", path.Join(keepDir, "keep.json"), + "path to configuration file") + flag.Parse() + conf, err := ioutil.ReadFile(configPath) + if err != nil { + log.Fatal(err) + } + err = json.Unmarshal([]byte(conf), &config) + if err != nil { + log.Fatal(err) + } + + // Create and initialize URL cache database + db := initDB(path.Join(keepDir, "keep.db")) + + // Channel for passing URLs to the archive goroutine for archival + messageChan = make(chan *Message, 25) + go archiver(db) + + // Create a new Discord session using provided credentials + dg, err := discordgo.New(config.Token) + if err != nil { + fmt.Println("error creating Discord session,", err) + return + } + + // Make our client look like Firefox since we're authenticating with + // user/pass credentials (self bot) + dg.UserAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:94.0) Gecko/20100101 Firefox/94.0 " + + // Register the messageCreate func as a callback for MessageCreate events + dg.AddHandler(messageCreate) + + // We only care about receiving message events + dg.Identify.Intents = discordgo.IntentsGuildMessages + + // Open a websocket connection to Discord and begin listening + err = dg.Open() + if err != nil { + fmt.Println("error opening connection,", err) + return + } + + // Wait here until CTRL-C or other term signal is received + sc := make(chan os.Signal, 1) + signal.Notify(sc, syscall.SIGINT, syscall.SIGTERM, os.Interrupt, os.Kill) + <-sc + + // Cleanly close down the Discord session + dg.Close() +} + +// archiver is intended to be run in its own goroutine, receiving URLs from main +// over a shared channel for processing +func archiver(db *sql.DB) { + + // Each iteration removes and processes one url from the channel + for { + + // Blocks until URL is received + message := <-messageChan + + // Skip if we have URL in database and status OK + cached, status_code := isCached(db, message.URL) + if cached && status_code == http.StatusOK { + log.Printf("%d %s", status_code, message.URL) + continue + } + + // Skip if the Internet Archive already has a copy available + archived, status_code := isArchived(message.URL) + if archived && status_code == http.StatusOK { + addArchived(db, message, status_code) + log.Printf("%d %s", status_code, message.URL) + continue + } + + // Archive, URL is not present in cache or IA + status_code = archive(message.URL) + addArchived(db, message, status_code) + log.Printf("%d %s", status_code, message.URL) + + // Limit requests to Wayback API to 5-second intervals + time.Sleep(5 * time.Second) + } +} + +// messageCreate be called (due to AddHandler above) every time a new message is +// created on any channel that the authenticated bot has access to +func messageCreate(s *discordgo.Session, m *discordgo.MessageCreate) { + + // https://github.com/bwmarrin/discordgo/issues/961 + if m.Content == "" { + chanMsgs, err := s.ChannelMessages(m.ChannelID, 1, "", "", m.ID) + if err != nil { + log.Printf("Unable to get messages: %s", err) + return + } + m.Content = chanMsgs[0].Content + m.Attachments = chanMsgs[0].Attachments + } + + // Log all messages if verbose set to true + if config.Verbose { + log.Println(m.Content) + } + + // Split message by spaces into individual fields + for _, w := range strings.Fields(m.Content) { + + // Assess whether message part looks like a valid URL + u, err := url.Parse(w) + if err != nil || !u.IsAbs() || strings.IndexByte(u.Host, '.') <= 0 { + continue + } + + // Ensure domain TLD is ICANN-managed + if _, icann := publicsuffix.PublicSuffix(u.Host); !icann { + continue + } + + // Ensure host is not present in blacklisted set + if isBlacklisted(u.Host) { + continue + } + + // Send message attributes/URL over the channel + message := Message{ + URL: w, + Author: m.Author.ID, + Guild: m.GuildID, + Channel: m.ChannelID, + } + messageChan <- &message + } +} |