From ca276b66837ac04bf92de257e5e65d2992f1a547 Mon Sep 17 00:00:00 2001 From: Jordan Date: Thu, 24 Nov 2022 00:55:37 -0700 Subject: archive, keep: send authenticated POST requests to /save/ IA endpoint --- keep.go | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) (limited to 'keep.go') diff --git a/keep.go b/keep.go index 5751bad..4432217 100644 --- a/keep.go +++ b/keep.go @@ -17,17 +17,19 @@ import ( "syscall" "time" + "github.com/PuerkitoBio/purell" "github.com/bwmarrin/discordgo" "golang.org/x/net/publicsuffix" - "github.com/PuerkitoBio/purell" ) type Config struct { - Token string `json:"token"` - Verbose bool `json:"verbose"` - Ignore []string `json:"ignore"` - Host string `json:"host"` - Port string `json:"port"` + AccessKey string `json:"access-key"` + SecretKey string `json:"secret-key"` + Token string `json:"token"` + Verbose bool `json:"verbose"` + Ignore []string `json:"ignore"` + Host string `json:"host"` + Port string `json:"port"` } type Message struct { @@ -75,7 +77,7 @@ func main() { // Channel for passing URLs to the archive goroutine for archival messageChan = make(chan *Message, 25) - go archiver(db) + go archiver(config.AccessKey, config.SecretKey, db) // Start HTTP server http.HandleFunc("/", db.IndexHandler) @@ -118,7 +120,7 @@ func main() { // archiver is intended to be run in its own goroutine, receiving URLs from main // over a shared channel for processing -func archiver(db *SqliteDB) { +func archiver(accessKey string, secretKey string, db *SqliteDB) { // Each iteration removes and processes one url from the channel for { @@ -127,24 +129,24 @@ func archiver(db *SqliteDB) { message := <-messageChan // Skip if we've already seen URL (cached) - cached, status_code := db.IsCached(message.URL) + cached, statusCode := db.IsCached(message.URL) if cached { - log.Println("SEEN", status_code, message.URL) + log.Println("SEEN", statusCode, message.URL) continue } // Skip if the Internet Archive already has a copy available - archived, status_code := isArchived(message.URL) - if archived && status_code == http.StatusOK { - db.AddArchived(message, status_code) - log.Println("SKIP", status_code, message.URL) + archived, statusCode := isArchived(message.URL) + if archived && statusCode == http.StatusOK { + db.AddArchived(message, statusCode) + log.Println("SKIP", statusCode, message.URL) continue } // Archive, URL is not present in cache or IA - status_code = archive(message.URL) - db.AddArchived(message, status_code) - log.Println("SAVE", status_code, message.URL) + statusCode, jobID := archive(accessKey, secretKey, message.URL) + db.AddArchived(message, statusCode) + log.Println("SAVE", statusCode, message.URL, jobID) // Limit requests to Wayback API to 15-second intervals time.Sleep(15 * time.Second) @@ -190,10 +192,10 @@ func messageCreate(s *discordgo.Session, m *discordgo.MessageCreate) { // Normalize URL (RFC 3986) uStr := purell.NormalizeURL(u, purell.FlagsSafe| - purell.FlagRemoveDotSegments| - purell.FlagRemoveDuplicateSlashes| - purell.FlagRemoveFragment| - purell.FlagSortQuery) + purell.FlagRemoveDotSegments| + purell.FlagRemoveDuplicateSlashes| + purell.FlagRemoveFragment| + purell.FlagSortQuery) // Ensure host is not present in ignoreList set if isIgnored(config.Ignore, uStr) { -- cgit v1.2.3-54-g00ecf