From 245076d017f26e12c162c08933fa3078cf19f63b Mon Sep 17 00:00:00 2001 From: Jordan Date: Sat, 18 Dec 2021 16:45:53 -0700 Subject: web, db: add processed URL index, housekeeping --- README.md | 7 ++- db.go | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++---- db_test.go | 101 +++++++++++++++++++++++------- keep.go | 33 +++++++--- keep.json | 2 + web.go | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 487 insertions(+), 48 deletions(-) create mode 100644 web.go diff --git a/README.md b/README.md index c4a9ba2..17f19ac 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Keep -Keep is a minimal Discord bot which saves any URLs parsed from messages visible -to the configured account on the Wayback Machine. +Keep is a minimal Discord bot which archives URLs visible to the configured +account (sent by anyone, anywhere) on the Wayback Machine. A local cache of saved URLs is kept to prevent duplicate availability API requests. @@ -14,6 +14,9 @@ test`. ## Usage +Create `~/.keep`, copy and populate `keep.json`, then start `./keep`. An index +of processed URLs can be found at `127.0.0.1:9099`. + ``` Usage of ./keep: -config string diff --git a/db.go b/db.go index 21c68ae..d10f4ba 100644 --- a/db.go +++ b/db.go @@ -9,10 +9,22 @@ import ( _ "github.com/mattn/go-sqlite3" ) +type Entry struct { + ID int + Message Message + Status int +} + +type Stats struct { + Users int + Guilds int + Channels int + URLs int +} + func initDB(path string) *sql.DB { if _, err := os.Stat(path); errors.Is(err, os.ErrNotExist) { - log.Printf("Creating %s...\n", path) file, err := os.Create(path) if err != nil { log.Fatal(err) @@ -71,10 +83,10 @@ func initTables(db *sql.DB) { } } -func addArchived(db *sql.DB, m *Message, status_code int) { +func (db *SqliteDB) AddArchived(m *Message, status_code int) { // Start a transaction using default isolation - tx, err := db.Begin() + tx, err := db.db.Begin() if err != nil { log.Fatal(err) } @@ -94,14 +106,14 @@ func addArchived(db *sql.DB, m *Message, status_code int) { // Insert entry in URLs table using foreign key reference IDs _, err = tx.Exec(` INSERT OR IGNORE INTO - urls(url, user_string_id, guild_string_id, channel_string_id, status_code) - VALUES( - ?, - (SELECT id FROM users WHERE user_id = ?), - (SELECT id FROM guilds WHERE guild_id = ?), - (SELECT id FROM channels WHERE channel_id = ?), - ? - );`, m.URL, m.Author, m.Guild, m.Channel, status_code) + urls(url, user_string_id, guild_string_id, channel_string_id, status_code) + VALUES( + ?, + (SELECT id FROM users WHERE user_id = ?), + (SELECT id FROM guilds WHERE guild_id = ?), + (SELECT id FROM channels WHERE channel_id = ?), + ? + );`, m.URL, m.Author, m.Guild, m.Channel, status_code) if err != nil { log.Fatal(err) } @@ -112,10 +124,10 @@ func addArchived(db *sql.DB, m *Message, status_code int) { } } -func isCached(db *sql.DB, url string) (bool, int) { +func (db *SqliteDB) IsCached(url string) (bool, int) { var status_code int - err := db.QueryRow("SELECT status_code FROM urls WHERE url = ?", + err := db.db.QueryRow("SELECT status_code FROM urls WHERE url = ?", url).Scan(&status_code) switch { case err == sql.ErrNoRows: @@ -125,3 +137,150 @@ func isCached(db *sql.DB, url string) (bool, int) { } return true, status_code } + +func (db *SqliteDB) Stats() (*Stats, error) { + + var stats Stats + err := db.db.QueryRow(` + SELECT + (SELECT COUNT(*) FROM urls), + (SELECT COUNT(*) FROM users), + (SELECT COUNT(*) FROM guilds), + (SELECT COUNT(*) FROM channels) + ;`).Scan(&stats.URLs, &stats.Users, &stats.Guilds, &stats.Channels) + if err != nil { + return nil, err + } + return &stats, nil +} + +func (db *SqliteDB) ListEntries(limit int, offset int, user string, + guild string, channel string) (*[]Entry, error) { + + var rows *sql.Rows + var err error + if user == "" && guild == "" && channel == "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, limit, offset) + if err != nil { + return nil, err + } + } else if user != "" && guild == "" && channel == "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE user_string_id = (SELECT id FROM users WHERE user_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, user, limit, offset) + if err != nil { + return nil, err + } + } else if user != "" && guild != "" && channel == "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE user_string_id = (SELECT id FROM users WHERE user_id = ?) + AND guild_string_id = (SELECT id FROM guilds WHERE guild_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, user, guild, limit, offset) + if err != nil { + return nil, err + } + } else if user != "" && guild == "" && channel != "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE user_string_id = (SELECT id FROM users WHERE user_id = ?) + AND channel_string_id = (SELECT id FROM channels WHERE channel_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, user, channel, limit, offset) + if err != nil { + return nil, err + } + } else if user != "" && guild != "" && channel != "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE user_string_id = (SELECT id FROM users WHERE user_id = ?) + AND guild_string_id = (SELECT id FROM guilds WHERE guild_id = ?) + AND channel_string_id = (SELECT id FROM channels WHERE channel_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, user, guild, channel, limit, offset) + if err != nil { + return nil, err + } + } else if user == "" && guild != "" && channel != "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE guild_string_id = (SELECT id FROM guilds WHERE guild_id = ?) + AND channel_string_id = (SELECT id FROM channels WHERE channel_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, guild, channel, limit, offset) + if err != nil { + return nil, err + } + } else if user == "" && guild == "" && channel != "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE channel_string_id = (SELECT id FROM channels WHERE channel_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, channel, limit, offset) + if err != nil { + return nil, err + } + } else if user == "" && guild != "" && channel == "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE guild_string_id = (SELECT id FROM guilds WHERE guild_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, guild, limit, offset) + if err != nil { + return nil, err + } + } + defer rows.Close() + + var entries []Entry + for rows.Next() { + var e Entry + if err := rows.Scan(&e.ID, &e.Message.URL, &e.Message.Author, + &e.Message.Guild, &e.Message.Channel, &e.Status); err != nil { + return nil, err + } + entries = append(entries, e) + } + if err := rows.Err(); err != nil { + return nil, err + } + return &entries, nil +} diff --git a/db_test.go b/db_test.go index b429aaf..d2a1ee1 100644 --- a/db_test.go +++ b/db_test.go @@ -1,48 +1,103 @@ package main import ( - "database/sql" "io/ioutil" "net/http" "os" "testing" ) -var ( - db *sql.DB - db_path string -) - -func TestInitDB(t *testing.T) { +func TestDB(t *testing.T) { + // initDB() tmpDB, _ := ioutil.TempFile("", "tmp-*.db") - db_path = tmpDB.Name() + db_path := tmpDB.Name() os.Remove(db_path) - db = initDB(db_path) -} + db := &SqliteDB{db: initDB(db_path)} -func TestAddArchived(t *testing.T) { + // Cleanup temporary DB when test completes + t.Cleanup(func() { + os.Remove(db_path) + }) + // AddArchived() m := Message{ URL: "http://example.com/", Author: "000000000000000000", - Guild: "000000000000000000", - Channel: "000000000000000000", + Guild: "222222222222222222", + Channel: "111111111111111111", } - addArchived(db, &m, 200) -} - -func TestIsCached(t *testing.T) { + db.AddArchived(&m, 200) + m = Message{ + URL: "http://example.net/", + Author: "111111111111111111", + Guild: "222222222222222222", + Channel: "333333333333333333", + } + db.AddArchived(&m, 404) + // IsCached() url := "http://example.com/" - cached, status_code := isCached(db, url) + cached, status_code := db.IsCached(url) if status_code != http.StatusOK || cached != true { - t.Errorf("Received %t, %d; wanted %t, %d", cached, status_code, true, - http.StatusOK) + t.Errorf("IsCached(): Received %t, %d; wanted %t, %d", cached, + status_code, true, http.StatusOK) + } + url = "http://example.org/" + cached, status_code = db.IsCached(url) + if status_code != 0 || cached != false { + t.Errorf("IsCached(): Received %t, %d; wanted %t, %d", cached, + status_code, true, http.StatusOK) } -} -func TestDBCleanup(t *testing.T) { + // ListEntries() + e, err := db.ListEntries(10, 0, "", "", "") + if err != nil { + t.Error(err) + } + if len(*e) != 2 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "000000000000000000", "", "") + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "", "222222222222222222", "") + if len(*e) != 2 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "", "", "333333333333333333") + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "111111111111111111", "222222222222222222", "") + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "111111111111111111", "", "333333333333333333") + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "111111111111111111", "222222222222222222", "333333333333333333") + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } - os.Remove(db_path) + // Stats() + stats, err := db.Stats() + if err != nil { + t.Fatal(err) + } + statsExpected := &Stats{ + URLs: 2, + Users: 2, + Guilds: 1, + Channels: 2, + } + if stats == statsExpected { + t.Errorf("Stats(): Received %v; wanted %v", stats, statsExpected) + } } diff --git a/keep.go b/keep.go index 2a93f2c..8457ff2 100644 --- a/keep.go +++ b/keep.go @@ -25,6 +25,8 @@ type Config struct { Token string `json:"token"` Verbose bool `json:"verbose"` Ignore []string `json:"ignore"` + Host string `json:"host"` + Port string `json:"port"` } type Message struct { @@ -34,6 +36,10 @@ type Message struct { Channel string } +type SqliteDB struct { + db *sql.DB +} + var ( messageChan chan *Message config Config @@ -63,12 +69,19 @@ func main() { } // Create and initialize URL cache database - db := initDB(path.Join(keepDir, "keep.db")) + sqlSqliteDB := initDB(path.Join(keepDir, "keep.db")) + db := &SqliteDB{db: sqlSqliteDB} // Channel for passing URLs to the archive goroutine for archival messageChan = make(chan *Message, 25) go archiver(db) + // Start HTTP server + http.HandleFunc("/", db.IndexHandler) + log.Printf("Listening on %v port %v (http://%v:%v/)\n", config.Host, + config.Port, config.Host, config.Port) + go http.ListenAndServe(fmt.Sprintf("%s:%s", config.Host, config.Port), nil) + // Create a new Discord session using provided credentials dg, err := discordgo.New(config.Token) if err != nil { @@ -87,11 +100,11 @@ func main() { dg.Identify.Intents = discordgo.IntentsGuildMessages // Open a websocket connection to Discord and begin listening - err = dg.Open() - if err != nil { - fmt.Println("error opening connection,", err) - return - } + //err = dg.Open() + //if err != nil { + // fmt.Println("error opening connection,", err) + // return + //} // Wait here until CTRL-C or other term signal is received sc := make(chan os.Signal, 1) @@ -104,7 +117,7 @@ func main() { // archiver is intended to be run in its own goroutine, receiving URLs from main // over a shared channel for processing -func archiver(db *sql.DB) { +func archiver(db *SqliteDB) { // Each iteration removes and processes one url from the channel for { @@ -113,7 +126,7 @@ func archiver(db *sql.DB) { message := <-messageChan // Skip if we have URL in database - cached, _ := isCached(db, message.URL) + cached, _ := db.IsCached(message.URL) if cached { continue } @@ -121,14 +134,14 @@ func archiver(db *sql.DB) { // Skip if the Internet Archive already has a copy available archived, status_code := isArchived(message.URL) if archived && status_code == http.StatusOK { - addArchived(db, message, status_code) + db.AddArchived(message, status_code) log.Printf("SKIP %d %s", status_code, message.URL) continue } // Archive, URL is not present in cache or IA status_code = archive(message.URL) - addArchived(db, message, status_code) + db.AddArchived(message, status_code) log.Printf("SAVE %d %s", status_code, message.URL) // Limit requests to Wayback API to 5-second intervals diff --git a/keep.json b/keep.json index 1807efb..6c015ae 100644 --- a/keep.json +++ b/keep.json @@ -1,4 +1,6 @@ { + "host":"127.0.0.1", + "port":"9099", "token":"YXiHglqrSrEXRSIX83PhbPxskICaEOFTiUo757i57o1ffk67Zgb2qORhLq1", "verbose":false, "ignore": [ diff --git a/web.go b/web.go new file mode 100644 index 0000000..c3c8d67 --- /dev/null +++ b/web.go @@ -0,0 +1,207 @@ +package main + +import ( + "html/template" + "log" + "net/http" + "net/url" + "strconv" +) + +type Resp struct { + Entries *[]Entry + Err error + Stats *Stats + URL string + Offset int + User string + Guild string + Channel string +} + +var funcMap = template.FuncMap{ + "add": add, + "minus": minus, + "setQuery": setQuery, + "getRoot": getRoot, + "intToStr": intToStr, +} + +const i = ` + + + + + + +
+

Keep

+

{{- .Err -}}

+

+ {{- .Stats.URLs -}} URLs, + {{- .Stats.Users -}} users, + {{- .Stats.Guilds -}} guilds, + {{- .Stats.Channels -}} channels +

+ +

+ {{- if or (ne .User "") (ne .Guild "") (ne .Channel "") -}} + Entries filtered by: + {{- end -}} + {{- if ne .User "" }} User ({{ .User -}}){{- end -}} + {{- if ne .Guild "" }} Guild ({{ .Guild -}}){{- end -}} + {{- if ne .Channel "" }} Channel ({{ .Channel -}}){{- end -}} +

+ + + + + + + + + + + + + {{- range $e := .Entries -}} + + + + + + {{- end -}} +
IDHTTPURL
{{- $e.ID -}}{{- if eq $e.Status 0 -}}PEND{{- else -}}{{ $e.Status }}{{- end -}}{{ $e.Message.URL }}
+
+ + + + ` + +var t = template.Must(template.New("").Funcs(funcMap).Parse(i)) + +func minus(a int, b int) int { + + return a - b +} + +func add(a int, b int) int { + + return a + b +} + +func intToStr(a int) string { + + return strconv.Itoa(a) +} + +func setQuery(urlStr string, query string, value string) string { + + u, _ := url.Parse(urlStr) + q := u.Query() + q.Set(query, value) + u.RawQuery = q.Encode() + return u.String() +} + +func getRoot(urlStr string) string { + + u, _ := url.Parse(urlStr) + u.RawQuery = "" + u.Fragment = "" + return u.String() +} + +func (db *SqliteDB) IndexHandler(w http.ResponseWriter, r *http.Request) { + + resp := Resp{} + resp.Stats, resp.Err = db.Stats() + if resp.Err != nil { + log.Println(resp.Err) + t.Execute(w, &resp) + return + } + + resp.URL = r.URL.String() + query := r.URL.Query() + + var err error + resp.Offset, err = strconv.Atoi(query.Get("offset")) + if err != nil { + resp.Offset = 0 + } + resp.User = query.Get("user") + resp.Guild = query.Get("guild") + resp.Channel = query.Get("channel") + + resp.Entries, resp.Err = db.ListEntries(100, resp.Offset, resp.User, + resp.Guild, resp.Channel) + if resp.Err != nil { + log.Println(resp.Err) + } + t.Execute(w, &resp) +} -- cgit v1.2.3-54-g00ecf