diff options
author | Jordan <me@jordan.im> | 2021-12-18 16:45:53 -0700 |
---|---|---|
committer | Jordan <me@jordan.im> | 2021-12-18 16:45:53 -0700 |
commit | 245076d017f26e12c162c08933fa3078cf19f63b (patch) | |
tree | 52d39fa92bfb100d9950c817cea6d10158ff119c | |
parent | 27584d82ad625428c0c3f0e9edb48d1f51647ec6 (diff) | |
download | keep-245076d017f26e12c162c08933fa3078cf19f63b.tar.gz keep-245076d017f26e12c162c08933fa3078cf19f63b.zip |
web, db: add processed URL index, housekeeping
-rw-r--r-- | README.md | 7 | ||||
-rw-r--r-- | db.go | 185 | ||||
-rw-r--r-- | db_test.go | 101 | ||||
-rw-r--r-- | keep.go | 33 | ||||
-rw-r--r-- | keep.json | 2 | ||||
-rw-r--r-- | web.go | 207 |
6 files changed, 487 insertions, 48 deletions
@@ -1,7 +1,7 @@ # Keep -Keep is a minimal Discord bot which saves any URLs parsed from messages visible -to the configured account on the Wayback Machine. +Keep is a minimal Discord bot which archives URLs visible to the configured +account (sent by anyone, anywhere) on the Wayback Machine. A local cache of saved URLs is kept to prevent duplicate availability API requests. @@ -14,6 +14,9 @@ test`. ## Usage +Create `~/.keep`, copy and populate `keep.json`, then start `./keep`. An index +of processed URLs can be found at `127.0.0.1:9099`. + ``` Usage of ./keep: -config string @@ -9,10 +9,22 @@ import ( _ "github.com/mattn/go-sqlite3" ) +type Entry struct { + ID int + Message Message + Status int +} + +type Stats struct { + Users int + Guilds int + Channels int + URLs int +} + func initDB(path string) *sql.DB { if _, err := os.Stat(path); errors.Is(err, os.ErrNotExist) { - log.Printf("Creating %s...\n", path) file, err := os.Create(path) if err != nil { log.Fatal(err) @@ -71,10 +83,10 @@ func initTables(db *sql.DB) { } } -func addArchived(db *sql.DB, m *Message, status_code int) { +func (db *SqliteDB) AddArchived(m *Message, status_code int) { // Start a transaction using default isolation - tx, err := db.Begin() + tx, err := db.db.Begin() if err != nil { log.Fatal(err) } @@ -94,14 +106,14 @@ func addArchived(db *sql.DB, m *Message, status_code int) { // Insert entry in URLs table using foreign key reference IDs _, err = tx.Exec(` INSERT OR IGNORE INTO - urls(url, user_string_id, guild_string_id, channel_string_id, status_code) - VALUES( - ?, - (SELECT id FROM users WHERE user_id = ?), - (SELECT id FROM guilds WHERE guild_id = ?), - (SELECT id FROM channels WHERE channel_id = ?), - ? - );`, m.URL, m.Author, m.Guild, m.Channel, status_code) + urls(url, user_string_id, guild_string_id, channel_string_id, status_code) + VALUES( + ?, + (SELECT id FROM users WHERE user_id = ?), + (SELECT id FROM guilds WHERE guild_id = ?), + (SELECT id FROM channels WHERE channel_id = ?), + ? + );`, m.URL, m.Author, m.Guild, m.Channel, status_code) if err != nil { log.Fatal(err) } @@ -112,10 +124,10 @@ func addArchived(db *sql.DB, m *Message, status_code int) { } } -func isCached(db *sql.DB, url string) (bool, int) { +func (db *SqliteDB) IsCached(url string) (bool, int) { var status_code int - err := db.QueryRow("SELECT status_code FROM urls WHERE url = ?", + err := db.db.QueryRow("SELECT status_code FROM urls WHERE url = ?", url).Scan(&status_code) switch { case err == sql.ErrNoRows: @@ -125,3 +137,150 @@ func isCached(db *sql.DB, url string) (bool, int) { } return true, status_code } + +func (db *SqliteDB) Stats() (*Stats, error) { + + var stats Stats + err := db.db.QueryRow(` + SELECT + (SELECT COUNT(*) FROM urls), + (SELECT COUNT(*) FROM users), + (SELECT COUNT(*) FROM guilds), + (SELECT COUNT(*) FROM channels) + ;`).Scan(&stats.URLs, &stats.Users, &stats.Guilds, &stats.Channels) + if err != nil { + return nil, err + } + return &stats, nil +} + +func (db *SqliteDB) ListEntries(limit int, offset int, user string, + guild string, channel string) (*[]Entry, error) { + + var rows *sql.Rows + var err error + if user == "" && guild == "" && channel == "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, limit, offset) + if err != nil { + return nil, err + } + } else if user != "" && guild == "" && channel == "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE user_string_id = (SELECT id FROM users WHERE user_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, user, limit, offset) + if err != nil { + return nil, err + } + } else if user != "" && guild != "" && channel == "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE user_string_id = (SELECT id FROM users WHERE user_id = ?) + AND guild_string_id = (SELECT id FROM guilds WHERE guild_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, user, guild, limit, offset) + if err != nil { + return nil, err + } + } else if user != "" && guild == "" && channel != "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE user_string_id = (SELECT id FROM users WHERE user_id = ?) + AND channel_string_id = (SELECT id FROM channels WHERE channel_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, user, channel, limit, offset) + if err != nil { + return nil, err + } + } else if user != "" && guild != "" && channel != "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE user_string_id = (SELECT id FROM users WHERE user_id = ?) + AND guild_string_id = (SELECT id FROM guilds WHERE guild_id = ?) + AND channel_string_id = (SELECT id FROM channels WHERE channel_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, user, guild, channel, limit, offset) + if err != nil { + return nil, err + } + } else if user == "" && guild != "" && channel != "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE guild_string_id = (SELECT id FROM guilds WHERE guild_id = ?) + AND channel_string_id = (SELECT id FROM channels WHERE channel_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, guild, channel, limit, offset) + if err != nil { + return nil, err + } + } else if user == "" && guild == "" && channel != "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE channel_string_id = (SELECT id FROM channels WHERE channel_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, channel, limit, offset) + if err != nil { + return nil, err + } + } else if user == "" && guild != "" && channel == "" { + rows, err = db.db.Query(` + SELECT urls.id, urls.url, users.user_id, guilds.guild_id, channels.channel_id, status_code + FROM urls + INNER JOIN users ON users.id = urls.user_string_id + INNER JOIN guilds ON guilds.id = urls.guild_string_id + INNER JOIN channels ON channels.id = urls.channel_string_id + WHERE guild_string_id = (SELECT id FROM guilds WHERE guild_id = ?) + ORDER BY urls.id DESC + LIMIT ? OFFSET ?;`, guild, limit, offset) + if err != nil { + return nil, err + } + } + defer rows.Close() + + var entries []Entry + for rows.Next() { + var e Entry + if err := rows.Scan(&e.ID, &e.Message.URL, &e.Message.Author, + &e.Message.Guild, &e.Message.Channel, &e.Status); err != nil { + return nil, err + } + entries = append(entries, e) + } + if err := rows.Err(); err != nil { + return nil, err + } + return &entries, nil +} @@ -1,48 +1,103 @@ package main import ( - "database/sql" "io/ioutil" "net/http" "os" "testing" ) -var ( - db *sql.DB - db_path string -) - -func TestInitDB(t *testing.T) { +func TestDB(t *testing.T) { + // initDB() tmpDB, _ := ioutil.TempFile("", "tmp-*.db") - db_path = tmpDB.Name() + db_path := tmpDB.Name() os.Remove(db_path) - db = initDB(db_path) -} + db := &SqliteDB{db: initDB(db_path)} -func TestAddArchived(t *testing.T) { + // Cleanup temporary DB when test completes + t.Cleanup(func() { + os.Remove(db_path) + }) + // AddArchived() m := Message{ URL: "http://example.com/", Author: "000000000000000000", - Guild: "000000000000000000", - Channel: "000000000000000000", + Guild: "222222222222222222", + Channel: "111111111111111111", } - addArchived(db, &m, 200) -} - -func TestIsCached(t *testing.T) { + db.AddArchived(&m, 200) + m = Message{ + URL: "http://example.net/", + Author: "111111111111111111", + Guild: "222222222222222222", + Channel: "333333333333333333", + } + db.AddArchived(&m, 404) + // IsCached() url := "http://example.com/" - cached, status_code := isCached(db, url) + cached, status_code := db.IsCached(url) if status_code != http.StatusOK || cached != true { - t.Errorf("Received %t, %d; wanted %t, %d", cached, status_code, true, - http.StatusOK) + t.Errorf("IsCached(): Received %t, %d; wanted %t, %d", cached, + status_code, true, http.StatusOK) + } + url = "http://example.org/" + cached, status_code = db.IsCached(url) + if status_code != 0 || cached != false { + t.Errorf("IsCached(): Received %t, %d; wanted %t, %d", cached, + status_code, true, http.StatusOK) } -} -func TestDBCleanup(t *testing.T) { + // ListEntries() + e, err := db.ListEntries(10, 0, "", "", "") + if err != nil { + t.Error(err) + } + if len(*e) != 2 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "000000000000000000", "", "") + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "", "222222222222222222", "") + if len(*e) != 2 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "", "", "333333333333333333") + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "111111111111111111", "222222222222222222", "") + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "111111111111111111", "", "333333333333333333") + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } + e, err = db.ListEntries(10, 0, "111111111111111111", "222222222222222222", "333333333333333333") + if len(*e) != 1 { + t.Errorf("ListEntries(): Recieved length %d; wanted %d", len(*e), 2) + } - os.Remove(db_path) + // Stats() + stats, err := db.Stats() + if err != nil { + t.Fatal(err) + } + statsExpected := &Stats{ + URLs: 2, + Users: 2, + Guilds: 1, + Channels: 2, + } + if stats == statsExpected { + t.Errorf("Stats(): Received %v; wanted %v", stats, statsExpected) + } } @@ -25,6 +25,8 @@ type Config struct { Token string `json:"token"` Verbose bool `json:"verbose"` Ignore []string `json:"ignore"` + Host string `json:"host"` + Port string `json:"port"` } type Message struct { @@ -34,6 +36,10 @@ type Message struct { Channel string } +type SqliteDB struct { + db *sql.DB +} + var ( messageChan chan *Message config Config @@ -63,12 +69,19 @@ func main() { } // Create and initialize URL cache database - db := initDB(path.Join(keepDir, "keep.db")) + sqlSqliteDB := initDB(path.Join(keepDir, "keep.db")) + db := &SqliteDB{db: sqlSqliteDB} // Channel for passing URLs to the archive goroutine for archival messageChan = make(chan *Message, 25) go archiver(db) + // Start HTTP server + http.HandleFunc("/", db.IndexHandler) + log.Printf("Listening on %v port %v (http://%v:%v/)\n", config.Host, + config.Port, config.Host, config.Port) + go http.ListenAndServe(fmt.Sprintf("%s:%s", config.Host, config.Port), nil) + // Create a new Discord session using provided credentials dg, err := discordgo.New(config.Token) if err != nil { @@ -87,11 +100,11 @@ func main() { dg.Identify.Intents = discordgo.IntentsGuildMessages // Open a websocket connection to Discord and begin listening - err = dg.Open() - if err != nil { - fmt.Println("error opening connection,", err) - return - } + //err = dg.Open() + //if err != nil { + // fmt.Println("error opening connection,", err) + // return + //} // Wait here until CTRL-C or other term signal is received sc := make(chan os.Signal, 1) @@ -104,7 +117,7 @@ func main() { // archiver is intended to be run in its own goroutine, receiving URLs from main // over a shared channel for processing -func archiver(db *sql.DB) { +func archiver(db *SqliteDB) { // Each iteration removes and processes one url from the channel for { @@ -113,7 +126,7 @@ func archiver(db *sql.DB) { message := <-messageChan // Skip if we have URL in database - cached, _ := isCached(db, message.URL) + cached, _ := db.IsCached(message.URL) if cached { continue } @@ -121,14 +134,14 @@ func archiver(db *sql.DB) { // Skip if the Internet Archive already has a copy available archived, status_code := isArchived(message.URL) if archived && status_code == http.StatusOK { - addArchived(db, message, status_code) + db.AddArchived(message, status_code) log.Printf("SKIP %d %s", status_code, message.URL) continue } // Archive, URL is not present in cache or IA status_code = archive(message.URL) - addArchived(db, message, status_code) + db.AddArchived(message, status_code) log.Printf("SAVE %d %s", status_code, message.URL) // Limit requests to Wayback API to 5-second intervals @@ -1,4 +1,6 @@ { + "host":"127.0.0.1", + "port":"9099", "token":"YXiHglqrSrEXRSIX83PhbPxskICaEOFTiUo757i57o1ffk67Zgb2qORhLq1", "verbose":false, "ignore": [ @@ -0,0 +1,207 @@ +package main + +import ( + "html/template" + "log" + "net/http" + "net/url" + "strconv" +) + +type Resp struct { + Entries *[]Entry + Err error + Stats *Stats + URL string + Offset int + User string + Guild string + Channel string +} + +var funcMap = template.FuncMap{ + "add": add, + "minus": minus, + "setQuery": setQuery, + "getRoot": getRoot, + "intToStr": intToStr, +} + +const i = ` + <!DOCTYPE HTML> + <html> + <head> + <style> + body { + display: block; + font-family: monospace; + white-space: wrap; + } + div{ + margin: 1em 0; + max-width: 70em; + margin-top: 0.5em; + margin-bottom: 0.5em; + margin-left: auto; + margin-right: auto; + } + table, th, td { + border: 1px solid black; + border-collapse: collapse; + } + th, td { + padding: .25em; + } + table { + table-layout: fixed; + width: 100%; + } + th { + text-align: left; + word-break: break-all; + } + td { + vertical-align: top; + text-align: left; + word-break: break-all; + } + #navigate { + display: flex; + justify-content: space-between; + } + </style> + </head> + <body> + <div> + <h1 style="text-align:center;">Keep</h1> + <p style="text-align: center;">{{- .Err -}}</p> + <p style="text-align: center;"> + <b>{{- .Stats.URLs -}}</b> URLs, + <b>{{- .Stats.Users -}}</b> users, + <b>{{- .Stats.Guilds -}}</b> guilds, + <b>{{- .Stats.Channels -}}</b> channels + </p> + <div id="navigate"> + <form action="" method="get"> + <label for="user">User:</label> + <input type="text" id="user" name="user"> + <label for="guild">Guild:</label> + <input type="text" id="guild" name="guild"> + <label for="channel">Channel:</label> + <input type="text" id="channel" name="channel"> + <input type="submit" value="Filter"> + </form> + </div> + <p style="text-align: center;"> + {{- if or (ne .User "") (ne .Guild "") (ne .Channel "") -}} + Entries filtered by: + {{- end -}} + {{- if ne .User "" }} <b>User</b> ({{ .User -}}){{- end -}} + {{- if ne .Guild "" }} <b>Guild</b> ({{ .Guild -}}){{- end -}} + {{- if ne .Channel "" }} <b>Channel</b> ({{ .Channel -}}){{- end -}} + </p> + <div id="navigate"> + {{- if gt .Offset 0 -}} + <a href="{{ setQuery .URL "offset" (intToStr (minus .Offset 100)) }}">Previous</a> + {{- end -}} + <a href="{{ getRoot .URL }}">Home</a> + {{- if ge (len .Entries) 100 -}} + <a href="{{ setQuery .URL "offset" (intToStr (add .Offset 100)) }}">Next</a> + {{- end -}} + </div> + <table> + <colgroup> + <col span="1" style="width: 7%;"> + <col span="1" style="width: 5%;"> + <col span="1" style="width: 87%;"> + </colgroup> + <tr> + <th>ID</th> + <th>HTTP</th> + <th>URL</th> + </tr> + {{- range $e := .Entries -}} + <tr> + <td>{{- $e.ID -}}</td> + <td>{{- if eq $e.Status 0 -}}PEND{{- else -}}{{ $e.Status }}{{- end -}}</td> + <td><a href="{{ $e.Message.URL }}">{{ $e.Message.URL }}</a></td> + </tr> + {{- end -}} + </table> + </div> + <div id="navigate"> + {{- if gt .Offset 0 -}} + <a href="{{ setQuery .URL "offset" (intToStr (minus .Offset 100)) }}">Previous</a> + {{- end -}} + <a href="{{ getRoot .URL }}">Home</a> + {{- if ge (len .Entries) 100 -}} + <a href="{{ setQuery .URL "offset" (intToStr (add .Offset 100)) }}">Next</a> + {{- end -}} + </div> + </body> + </html> + ` + +var t = template.Must(template.New("").Funcs(funcMap).Parse(i)) + +func minus(a int, b int) int { + + return a - b +} + +func add(a int, b int) int { + + return a + b +} + +func intToStr(a int) string { + + return strconv.Itoa(a) +} + +func setQuery(urlStr string, query string, value string) string { + + u, _ := url.Parse(urlStr) + q := u.Query() + q.Set(query, value) + u.RawQuery = q.Encode() + return u.String() +} + +func getRoot(urlStr string) string { + + u, _ := url.Parse(urlStr) + u.RawQuery = "" + u.Fragment = "" + return u.String() +} + +func (db *SqliteDB) IndexHandler(w http.ResponseWriter, r *http.Request) { + + resp := Resp{} + resp.Stats, resp.Err = db.Stats() + if resp.Err != nil { + log.Println(resp.Err) + t.Execute(w, &resp) + return + } + + resp.URL = r.URL.String() + query := r.URL.Query() + + var err error + resp.Offset, err = strconv.Atoi(query.Get("offset")) + if err != nil { + resp.Offset = 0 + } + resp.User = query.Get("user") + resp.Guild = query.Get("guild") + resp.Channel = query.Get("channel") + + resp.Entries, resp.Err = db.ListEntries(100, resp.Offset, resp.User, + resp.Guild, resp.Channel) + if resp.Err != nil { + log.Println(resp.Err) + } + t.Execute(w, &resp) +} |