initial commit

author: Jordan <me@jordan.im> 2021-02-16 16:48:56 -0700
committer: Jordan <me@jordan.im> 2021-02-16 16:48:56 -0700
commit: baa10aeb413a5ac109e56db5fd7a8d62a4d5965f (patch)
tree: b063d3df79882cb454329e6469088b00d382113f
download: crane-baa10aeb413a5ac109e56db5fd7a8d62a4d5965f.tar.gz
crane-baa10aeb413a5ac109e56db5fd7a8d62a4d5965f.zip
13 files changed, 1524 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b4938ff
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+*.swp
+*.swo
+papers
+crane
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..4eb8879
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,26 @@
+.POSIX:
+.SUFFIXES:
+
+GO = go
+RM = rm
+GOFLAGS =
+PREFIX = /usr/local
+BINDIR = $(PREFIX)/bin
+SHAREDIR = $(PREFIX)/share/crane
+
+goflags = $(GOFLAGS)
+
+all: crane
+
+crane:
+	$(GO) build $(goflags) -ldflags "-X main.buildPrefix=$(PREFIX)"
+
+clean:
+	$(RM) -f crane
+
+install: all
+	mkdir -p $(DESTDIR)$(BINDIR)
+	mkdir -p $(DESTDIR)$(SHAREDIR)
+	cp -f crane $(DESTDIR)$(BINDIR)
+	cp -R templates $(DESTDIR)$(SHAREDIR)
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d06e6de
--- /dev/null
+++ b/README.md
@@ -0,0 +1,46 @@
+# Crane
+
+Crane is a minimal self-hosted research literature organizational web service
+with support for paper download and metadata retrieval.
+
+No databases or app-proprietary formats are used. Papers are categorized by the
+directories in which they're stored, and XML metadata is retrieved from the
+[doi.org](https://www.doi.org/) API and written alongside each paper for which
+its DOI is known.
+
+![admin](screenshots/admin.png)
+
+## Installation
+
+Crane can be compiled with `make` or `go build`, and installed system-wide by
+running `make install` with root-level permissions.
+
+## Usage
+
+Crane can be run locally or on a server. The index (`"/"`) endpoint lists papers
+but does not permits modification to the set. The admin (`"/admin/"`) endpoint
+supports optional authentication and permits paper download, deletion, and
+moving between categories, as well as category addition, deletion, and rename.
+
+```
+Usage of ./crane:
+  -host string
+        IP address to listen on (default "127.0.0.1")
+  -port uint
+        Port to listen on (default 9090)
+  -path string
+        Absolute or relative path to papers folder (default "./papers")
+  -sci-hub string
+        Sci-Hub URL (default "https://sci-hub.se/")
+  -user string
+        Username for /admin/ endpoints (optional)
+  -pass string
+        Password for /admin/ endpoints (optional)
+```
+
+By default, crane listens on `127.0.0.1:9090` but this is configurable with the
+`--host` and `--port` parameters. Authentication is optional but can be enabled
+with `--user` and `--pass` parameters; the index is always publicly accessible.
+
+Papers are written to `--path`, stored in directories which serve as paper
+categories.
diff --git a/crane.go b/crane.go
new file mode 100644
index 0000000..983a4b9
--- /dev/null
+++ b/crane.go
@@ -0,0 +1,740 @@
+package main
+
+import (
+	"bufio"
+	"context"
+	"encoding/xml"
+	"errors"
+	"flag"
+	"fmt"
+	"html/template"
+	"io/ioutil"
+	"log"
+	"mime"
+	"net"
+	"net/http"
+	"net/http/cookiejar"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"golang.org/x/net/publicsuffix"
+)
+
+const MAX_SIZE int64 = 50000000 // max incoming HTTP request body size (50MB)
+
+var (
+	client      *http.Client
+	scihubURL   string
+	host        string
+	port        uint64
+	user        string
+	pass        string
+	buildPrefix string
+	templateDir string
+)
+
+type Contributor struct {
+	FirstName string `xml:"given_name"`
+	LastName  string `xml:"surname"`
+	Role      string `xml:"contributor_role,attr"`
+	Sequence  string `xml:"sequence,attr"`
+}
+
+type Meta struct {
+	XMLName      xml.Name      `xml:"doi_records"`
+	Journal      string        `xml:"doi_record>crossref>journal>journal_metadata>full_title"`
+	ISSN         string        `xml:"doi_record>crossref>journal>journal_metadata>issn"`
+	Title        string        `xml:"doi_record>crossref>journal>journal_article>titles>title"`
+	Contributors []Contributor `xml:"doi_record>crossref>journal>journal_article>contributors>person_name"`
+	PubYear      string        `xml:"doi_record>crossref>journal>journal_article>publication_date>year"`
+	PubMonth     string        `xml:"doi_record>crossref>journal>journal_article>publication_date>month"`
+	FirstPage    string        `xml:"doi_record>crossref>journal>journal_article>pages>first_page"`
+	LastPage     string        `xml:"doi_record>crossref>journal>journal_article>pages>last_page"`
+	DOI          string        `xml:"doi_record>crossref>journal>journal_article>doi_data>doi"`
+	Resource     string        `xml:"doi_record>crossref>journal>journal_article>doi_data>resource"`
+}
+
+type Paper struct {
+	Meta      Meta
+	MetaPath  string
+	PaperName string
+	PaperPath string
+}
+
+type Papers struct {
+	List map[string]map[string]*Paper
+	Path string
+}
+
+type Resp struct {
+	Papers           map[string]map[string]*Paper
+	Status           string
+	LastPaperDL      string
+	LastUsedCategory string
+}
+
+// getPaperFileNameFromMeta returns the built filename (absent an extension)
+// from doi.org metadata, consisting of the lowercase last name of the first
+// author followed by the year of publication (e.g. doe2020)
+func getPaperFileNameFromMeta(p *Meta) string {
+	var mainAuthor string
+	for _, contributor := range p.Contributors {
+		if contributor.Sequence == "first" {
+			mainAuthor = strings.Replace(contributor.LastName, "..", "", -1)
+			mainAuthor = strings.Replace(contributor.LastName, "/", "", -1)
+			break
+		}
+	}
+	if mainAuthor == "" || p.PubYear == "" {
+		return ""
+	}
+	pubYear := strings.Replace(p.PubYear, "..", "", -1)
+	pubYear = strings.Replace(p.PubYear, "/", "", -1)
+	return fmt.Sprint(strings.ToLower(mainAuthor), pubYear)
+}
+
+// getPaperFileNameFromResp returns the name of the file present at resp taken
+// first from content-disposition (if exists) then its destination URL
+// following redirects; e.g. doe2020
+func getPaperFileNameFromResp(resp *http.Response) string {
+	var filename string
+	if disp, ok := resp.Header["Content-Disposition"]; ok {
+		_, params, _ := mime.ParseMediaType(disp[0])
+		if f, ok := params["filename"]; ok && f != "" {
+			filename = f
+		}
+	}
+	if filename == "" {
+		u, _ := url.Parse(resp.Request.URL.String())
+		filename = strings.TrimSuffix(filepath.Base(u.Path), "/")
+	}
+	filename = strings.TrimSuffix(filename, ".pdf")
+	return filename
+}
+
+// getUniqueName ensures the provided paper name is unique, appending "-$ext"
+// until a unique name is found and returned
+func (papers *Papers) getUniqueName(c string, name string) string {
+	ext := 2
+	for {
+		k := filepath.Join(c, name+".pdf")
+		if _, exists := papers.List[c][k]; exists != true {
+			break
+		} else {
+			name = fmt.Sprint(name, "-", ext)
+			ext++
+		}
+	}
+	return name
+}
+
+// findPapersWalk is a WalkFunc passed to filepath.Walk() to process papers
+// stored on the filesystem
+func (papers *Papers) findPapersWalk(path string, info os.FileInfo, err error) error {
+	// skip the papers.Path root directory
+	if p, _ := filepath.Abs(path); p == papers.Path {
+		return nil
+	}
+
+	// derive category name (e.g. Mathematics) from directory name; used as key
+	var c string
+	if i, _ := os.Stat(path); i.IsDir() {
+		c = strings.TrimPrefix(path, papers.Path+"/")
+	} else {
+		c = strings.TrimPrefix(filepath.Dir(path), papers.Path+"/")
+	}
+	if _, exists := papers.List[c]; exists == false {
+		papers.List[c] = make(map[string]*Paper)
+	}
+
+	// now that category was added, ensure file is actually a PDF
+	if filepath.Ext(path) != ".pdf" {
+		return nil
+	}
+
+	var p Paper
+	p.PaperName = strings.TrimSuffix(filepath.Base(path), filepath.Ext(path))
+	p.PaperPath = filepath.Join(papers.Path, filepath.Join(c, p.PaperName+".pdf"))
+
+	// XML metadata is not required but highly recommended; PDFs aren't parsed
+	// so its our source only source of metadata at the moment
+	//
+	// PDF parsing looks (and probably is) fairly annoying to support and might
+	// be better handled by an external script
+	metaPath := filepath.Join(papers.Path, filepath.Join(c, p.PaperName+".meta.xml"))
+	if _, err := os.Stat(metaPath); err == nil {
+		p.MetaPath = metaPath
+
+		f, err := os.Open(p.MetaPath)
+		if err != nil {
+			return err
+		}
+
+		// memory-efficient relative to ioutil.ReadAll()
+		r := bufio.NewReader(f)
+		d := xml.NewDecoder(r)
+
+		// populate p struct with values derived from doi.org metadata
+		if err := d.Decode(&p.Meta); err != nil {
+			return err
+		}
+		if err := f.Close(); err != nil {
+			return err
+		}
+	}
+
+	// finally add paper to papers.List set; the subkey is the paper path
+	// relative to papers.Path, e.g. Mathematics/example2020.pdf
+	papers.List[c][filepath.Join(c, p.PaperName+".pdf")] = &p
+	return nil
+}
+
+// PopulatePapers wraps filepath.Walk() and populates the papers set with
+// discovered papers
+func (papers *Papers) PopulatePapers() error {
+	if err := filepath.Walk(papers.Path, papers.findPapersWalk); err != nil {
+		return err
+	}
+	return nil
+}
+
+// NewPaperFromDirectLink contains routines used to retrieve papers from remote
+// endpoints provided a direct link's http.Response
+func (papers *Papers) NewPaperFromDirectLink(resp *http.Response, c string) (*Paper, error) {
+	tmpPDF, err := ioutil.TempFile("", "tmp-*.pdf")
+	if err != nil {
+		return &Paper{}, err
+	}
+	err = saveRespBody(resp, tmpPDF.Name())
+	if err != nil {
+		return &Paper{}, err
+	}
+	if err := tmpPDF.Close(); err != nil {
+		return &Paper{}, err
+	}
+	defer os.Remove(tmpPDF.Name())
+
+	var p Paper
+	p.PaperName = papers.getUniqueName(c, getPaperFileNameFromResp(resp))
+	if err != nil {
+		return &Paper{}, err
+	}
+	p.PaperPath = filepath.Join(papers.Path, filepath.Join(c, p.PaperName+".pdf"))
+
+	if err := renameFile(tmpPDF.Name(), p.PaperPath); err != nil {
+		return nil, err
+	}
+	papers.List[c][filepath.Join(c, p.PaperName+".pdf")] = &p
+	return &p, nil
+}
+
+// NewPaperFromDOI contains routines used to retrieve papers from remote
+// endpoints provided a DOI
+func (papers *Papers) NewPaperFromDOI(doi []byte, c string) (*Paper, error) {
+	tmpXML, err := getMetaFromDOI(client, doi)
+	if err != nil {
+		return nil, err
+	}
+	defer os.Remove(tmpXML)
+
+	// open temporary XML file for parsing
+	f, err := os.Open(tmpXML)
+	if err != nil {
+		return nil, err
+	}
+	r := bufio.NewReader(f)
+	d := xml.NewDecoder(r)
+
+	// populate p struct with values derived from doi.org metadata
+	var p Paper
+	if err := d.Decode(&p.Meta); err != nil {
+		return nil, err
+	}
+	if err := f.Close(); err != nil {
+		return nil, err
+	}
+
+	n := getPaperFileNameFromMeta(&p.Meta) // doe2020
+	if n == "" {
+		// last-resort condition if metadata lacking author or publication year
+		n = strings.Replace(string(doi), "..", "", -1)
+		n = strings.Replace(string(doi), "/", "", -1)
+	}
+	u := papers.getUniqueName(c, n) // doe2020-(2, 3, 4...) if n already exists
+
+	// if not matching, check if DOIs match (genuine duplicate)
+	if n != u {
+		k := filepath.Join(c, n+".pdf")
+		if p.Meta.DOI == papers.List[c][k].Meta.DOI {
+			return nil, fmt.Errorf("paper %q with DOI %q already downloaded", n, string(doi))
+		}
+	}
+
+	p.PaperName = u
+	p.PaperPath = filepath.Join(filepath.Join(papers.Path, c), p.PaperName+".pdf")
+	p.MetaPath = filepath.Join(filepath.Join(papers.Path, c), p.PaperName+".meta.xml")
+
+	// parse scihubURL and join it w/ the DOI (accounts for no trailing slash)
+	url, _ := url.Parse(scihubURL)
+	url.Path = filepath.Join(url.Path, string(doi))
+
+	// make outbound request to sci-hub, save paper to temporary location
+	tmpPDF, err := getPaper(client, url.String())
+	if err != nil {
+		return nil, err
+	}
+	defer os.Remove(tmpPDF)
+
+	if err := renameFile(tmpPDF, p.PaperPath); err != nil {
+		return nil, err
+	}
+	if err := renameFile(tmpXML, p.MetaPath); err != nil {
+		return nil, err
+	}
+	papers.List[c][filepath.Join(c, p.PaperName+".pdf")] = &p
+	return &p, nil
+}
+
+// DeletePaper deletes the provided paper and its metadata from the filesystem
+// and the papers.List set
+func (papers *Papers) DeletePaper(p string) error {
+	// check if the category in which the paper is said to belong
+	// exists
+	c := filepath.Dir(p)
+	if _, exists := papers.List[c]; exists != true {
+		return fmt.Errorf("category %q does not exist\n", papers.List[filepath.Dir(p)])
+	}
+
+	// check if paper exists in the provided category
+	if _, exists := papers.List[c][p]; exists != true {
+		return fmt.Errorf("paper %q does not exist in category %q\n", p, c)
+	}
+
+	// paper and category exists and the paper belongs to the provided
+	// category; remove it and its XML metadata
+	if err := os.Remove(papers.List[c][p].PaperPath); err != nil {
+		return err
+	}
+
+	// XML metadata optional; delete it if it exists
+	metaPath := papers.List[c][p].MetaPath
+	if metaPath != "" {
+		if _, err := os.Stat(metaPath); err == nil {
+			if err := os.Remove(metaPath); err != nil {
+				return err
+			}
+		}
+	}
+	delete(papers.List[c], p)
+	return nil
+}
+
+// DeleteCategory deletes the provided category and its contents from the
+// filesystem and the papers.List set
+func (papers *Papers) DeleteCategory(c string) error {
+	if _, exists := papers.List[c]; exists != true {
+		return fmt.Errorf("category %q does not exist in the set\n", c)
+	}
+	if err := os.RemoveAll(filepath.Join(papers.Path, c)); err != nil {
+		return err
+	}
+	// remove categories which exist as subcategories of the deleted category
+	// from the set
+	for k, _ := range papers.List {
+		if strings.HasPrefix(k, c+"/") {
+			delete(papers.List, k)
+		}
+	}
+	delete(papers.List, c)
+	return nil
+}
+
+// MovePaper moves the provided paper to the destination category on the
+// filesystem and the papers.List set
+func (papers *Papers) MovePaper(p string, c string) error {
+	cPrev := filepath.Dir(p)
+	if _, exists := papers.List[cPrev]; exists != true {
+		return fmt.Errorf("category %q does not exist\n", cPrev)
+	}
+	if _, exists := papers.List[c]; exists != true {
+		return fmt.Errorf("category %q does not exist\n", c)
+	}
+	if _, exists := papers.List[cPrev][p]; exists != true {
+		return fmt.Errorf("paper %q does not exist in category %q\n", p, cPrev)
+	}
+	if _, exists := papers.List[c][p]; exists == true {
+		return fmt.Errorf("paper %q exists in destination category %q\n", p, c)
+	}
+	paperDest := filepath.Join(filepath.Join(papers.Path, c), papers.List[cPrev][p].PaperName+".pdf")
+	if err := os.Rename(papers.List[cPrev][p].PaperPath, paperDest); err != nil {
+		return err
+	}
+	papers.List[c][filepath.Join(c, filepath.Base(p))] = papers.List[cPrev][p]
+	papers.List[c][filepath.Join(c, filepath.Base(p))].PaperPath = paperDest
+
+	// XML metadata optional; move it if it exists
+	metaPath := papers.List[cPrev][p].MetaPath
+	if metaPath != "" {
+		if _, err := os.Stat(metaPath); err == nil {
+			metaName := papers.List[cPrev][p].PaperName + ".meta.xml"
+			metaDest := filepath.Join(filepath.Join(papers.Path, c), metaName)
+			if err := os.Rename(metaPath, metaDest); err != nil {
+				return err
+			}
+			papers.List[c][filepath.Join(c, filepath.Base(p))].MetaPath = metaDest
+		}
+	}
+	delete(papers.List[cPrev], p)
+	return nil
+}
+
+// RenameCategory renames the provided category on the filesystem and the
+// paper.List set
+func (papers *Papers) RenameCategory(c string, d string) error {
+	if _, exists := papers.List[c]; exists != true {
+		return fmt.Errorf("category %q does not exist in the set\n", c)
+	}
+	if _, exists := papers.List[d]; exists == true {
+		return fmt.Errorf("category %q already exists in the set\n", d)
+	}
+	if err := os.Rename(filepath.Join(papers.Path, c), filepath.Join(papers.Path, d)); err != nil {
+		return err
+	}
+	papers.List[d] = make(map[string]*Paper)
+	for k, v := range papers.List[c] {
+		pPaperPath := filepath.Join(papers.Path, filepath.Join(d, v.PaperName+".pdf"))
+		pK := filepath.Join(d, filepath.Base(k))
+		papers.List[d][pK] = papers.List[c][k]
+		papers.List[d][pK].PaperPath = pPaperPath
+
+		if v.MetaPath != "" {
+			pMetaPath := filepath.Join(papers.Path, filepath.Join(d, v.PaperName+".meta.xml"))
+			papers.List[d][pK].MetaPath = pMetaPath
+		}
+	}
+	delete(papers.List, c)
+	return nil
+}
+
+// ProcessAddPaperInput processes user-provided input related to new paper
+// download; c is the category, p can be a URL or DOI
+func (papers *Papers) ProcessAddPaperInput(c string, p string) (*Paper, error) {
+	var doi []byte
+	if u, err := url.Parse(p); err == nil && u.Scheme != "" && u.Host != "" {
+		resp, err := makeRequest(client, p)
+		if err != nil {
+			return &Paper{}, err
+		}
+		if resp.Header.Get("Content-Type") == "application/pdf" {
+			paper, err := papers.NewPaperFromDirectLink(resp, c)
+			if err != nil {
+				return &Paper{}, err
+			}
+			return paper, nil
+		}
+		doi = getDOIFromPage(resp)
+		if doi == nil {
+			resp, err = makeRequest(client, scihubURL+p)
+			if err != nil {
+				return &Paper{}, fmt.Errorf("%q: DOI not found on page", p)
+			}
+			doi = getDOIFromPage(resp)
+		}
+		if doi == nil {
+			return &Paper{}, fmt.Errorf("%q: DOI not found on page", p)
+		}
+	} else {
+		doi = getDOIFromBytes([]byte(p))
+		if doi == nil {
+			return &Paper{}, fmt.Errorf("%q is not a valid DOI or URL\n", p)
+		}
+	}
+	paper, err := papers.NewPaperFromDOI(doi, c)
+	if err != nil {
+		return &Paper{}, err
+	}
+	return paper, nil
+}
+
+// IndexHandler renders the index of papers stored in papers.Path
+func (papers *Papers) IndexHandler(w http.ResponseWriter, r *http.Request) {
+	// catch-all for paths unhandled by direct http.HandleFunc calls
+	if r.URL.Path != "/" {
+		http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound)
+		return
+	}
+	t, _ := template.ParseFiles(filepath.Join(templateDir, "layout.html"),
+		filepath.Join(templateDir, "index.html"),
+		filepath.Join(templateDir, "list.html"),
+	)
+	res := Resp{
+		Papers: papers.List,
+	}
+	t.Execute(w, &res)
+}
+
+// AdminHandler renders the index of papers stored in papers.Path with
+// additional forms to modify the collection (add, delete, rename...)
+func (papers *Papers) AdminHandler(w http.ResponseWriter, r *http.Request) {
+	t, _ := template.ParseFiles(filepath.Join(templateDir, "admin.html"),
+		filepath.Join(templateDir, "layout.html"),
+		filepath.Join(templateDir, "list.html"),
+	)
+	res := Resp{
+		Papers: papers.List,
+	}
+	if user != "" && pass != "" {
+		username, password, ok := r.BasicAuth()
+		if ok && user == username && pass == password {
+			t.Execute(w, &res)
+		} else {
+			w.Header().Add("WWW-Authenticate", `Basic realm="Please authenticate"`)
+			http.Error(w, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized)
+		}
+	} else {
+		t.Execute(w, &res)
+	}
+}
+
+// EditHandler renders the index of papers stored in papers.Path, prefixing
+// a checkbox to each unique paper and category for modification
+func (papers *Papers) EditHandler(w http.ResponseWriter, r *http.Request) {
+	t, _ := template.ParseFiles(filepath.Join(templateDir, "admin-edit.html"),
+		filepath.Join(templateDir, "layout.html"),
+		filepath.Join(templateDir, "list.html"),
+	)
+	res := Resp{
+		Papers: papers.List,
+	}
+	if user != "" && pass != "" {
+		username, password, ok := r.BasicAuth()
+		if !ok || user != username || pass != password {
+			w.Header().Add("WWW-Authenticate", `Basic realm="Please authenticate"`)
+			http.Error(w, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized)
+			return
+		}
+	}
+	if err := r.ParseForm(); err != nil {
+		res.Status = err.Error()
+		t.Execute(w, &res)
+		return
+	}
+
+	if action := r.FormValue("action"); action == "delete" {
+		for _, p := range r.Form["paper"] {
+			if res.Status != "" {
+				break
+			}
+			if err := papers.DeletePaper(p); err != nil {
+				res.Status = err.Error()
+			}
+		}
+		for _, c := range r.Form["category"] {
+			if res.Status != "" {
+				break
+			}
+			if err := papers.DeleteCategory(c); err != nil {
+				res.Status = err.Error()
+			}
+		}
+		if res.Status == "" {
+			res.Status = "delete successful"
+		}
+	} else if strings.HasPrefix(action, "move") {
+		cDest := strings.SplitN(action, "move-", 2)[1]
+		for _, p := range r.Form["paper"] {
+			if res.Status != "" {
+				break
+			}
+			if err := papers.MovePaper(p, cDest); err != nil {
+				res.Status = err.Error()
+			}
+		}
+		if res.Status == "" {
+			res.Status = "move successful"
+		}
+	} else {
+		rc := r.FormValue("rename-category")
+		rt := r.FormValue("rename-to")
+		if rc != "" && rt != "" {
+			// ensure filesystem safety of category names
+			rc = strings.Trim(strings.Replace(rc, "..", "", -1), "/.")
+			rt = strings.Trim(strings.Replace(rt, "..", "", -1), "/.")
+
+			if err := papers.RenameCategory(rc, rt); err != nil {
+				res.Status = err.Error()
+			}
+			if res.Status == "" {
+				res.Status = "rename successful"
+			}
+		}
+	}
+	t.Execute(w, &res)
+}
+
+// AddHandler provides support for new paper processing and category addition
+func (papers *Papers) AddHandler(w http.ResponseWriter, r *http.Request) {
+	t, _ := template.ParseFiles(filepath.Join(templateDir, "admin.html"),
+		filepath.Join(templateDir, "layout.html"),
+		filepath.Join(templateDir, "list.html"),
+	)
+	if user != "" && pass != "" {
+		username, password, ok := r.BasicAuth()
+		if !ok || user != username || pass != password {
+			w.Header().Add("WWW-Authenticate", `Basic realm="Please authenticate"`)
+			http.Error(w, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized)
+			return
+		}
+	}
+	p := r.FormValue("dl-paper")
+	c := r.FormValue("dl-category")
+	nc := r.FormValue("new-category")
+
+	// sanitize input; we use the category to build the path used to save papers
+	nc = strings.Trim(strings.Replace(nc, "..", "", -1), "/.")
+
+	addPaper := len(strings.TrimSpace(p)) > 0 && len(strings.TrimSpace(c)) > 0
+	addCategory := len(strings.TrimSpace(nc)) > 0
+	res := Resp{Papers: papers.List}
+
+	// paper download, both required fields populated
+	if addPaper {
+		if paper, err := papers.ProcessAddPaperInput(c, p); err != nil {
+			res.Status = err.Error()
+		} else {
+			if paper.Meta.Title != "" {
+				res.Status = fmt.Sprintf("%q downloaded successfully", paper.Meta.Title)
+			} else {
+				res.Status = fmt.Sprintf("%q downloaded successfully", paper.PaperName)
+			}
+			res.LastPaperDL = strings.TrimPrefix(paper.PaperPath, papers.Path+"/") // example/doe2021.pdf
+		}
+		res.LastUsedCategory = c
+	} else if addCategory {
+		// accounts for nested category addition; e.g. "foo/bar/baz" where
+		// "foo/bar" and/or "foo" do not already exist
+		n := nc
+		for n != "." {
+			_, exists := papers.List[n]
+			if exists == true {
+				res.Status = fmt.Sprintf("category %q already exists", n)
+			} else if err := os.MkdirAll(filepath.Join(papers.Path, n), os.ModePerm); err != nil {
+				res.Status = fmt.Sprintf("category %q could not be created on the filesystem", n)
+			} else {
+				papers.List[n] = make(map[string]*Paper)
+			}
+			if res.Status != "" {
+				break
+			}
+			res.LastUsedCategory = n
+			n = filepath.Dir(n)
+		}
+		if res.Status == "" {
+			res.Status = fmt.Sprintf("category %q added successfully", nc)
+		}
+	}
+	t.Execute(w, &res)
+}
+
+// DownloadHandler serves saved papers up for download
+func (papers *Papers) DownloadHandler(w http.ResponseWriter, r *http.Request) {
+	p := strings.TrimPrefix(r.URL.Path, "/download/")
+	c := filepath.Dir(p)
+
+	// return 404 if the provided paper category or paper key do not exist in
+	// the papers set
+	if _, exists := papers.List[c]; exists == false {
+		http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound)
+		return
+	}
+	if _, exists := papers.List[c][p]; exists == false {
+		http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound)
+		return
+	}
+
+	// ensure the paper (PaperPath) actually exists on the filesystem
+	i, err := os.Stat(papers.List[c][p].PaperPath)
+	if os.IsNotExist(err) {
+		http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound)
+	} else if i.IsDir() {
+		http.Error(w, http.StatusText(http.StatusForbidden), http.StatusForbidden)
+	} else {
+		http.ServeFile(w, r, papers.List[c][p].PaperPath)
+	}
+}
+
+func main() {
+	// some publishers have cookie + HTTP 302 checks (e.g. sagepub), let's look
+	// more like a real browser
+	options := cookiejar.Options{
+		PublicSuffixList: publicsuffix.List,
+	}
+	cookies, err := cookiejar.New(&options)
+	if err != nil {
+		panic(err)
+	}
+
+	// custom DialContext which blocks outbound requests to local addresses and
+	// interfaces (security)
+	http.DefaultTransport.(*http.Transport).DialContext = func(ctx context.Context, network, addr string) (net.Conn, error) {
+		// we could run our check after a dial, but we'd have to discard
+		// connect errors to prevent exposure of local services; a preemptive
+		// lookup is the lesser of two evils, I think
+		hosts, _ := net.LookupHost(addr[:strings.LastIndex(addr, ":")])
+		for _, host := range hosts {
+			if isPrivateIP(net.ParseIP(host)) {
+				return nil, errors.New("requests to private IPs are blocked")
+			}
+		}
+		conn, err := net.Dial(network, addr)
+		if err != nil {
+			return nil, err
+		}
+		return conn, err
+	}
+	client = &http.Client{Jar: cookies}
+
+	var papers Papers
+	papers.List = make(map[string]map[string]*Paper)
+
+	flag.StringVar(&scihubURL, "sci-hub", "https://sci-hub.se/", "Sci-Hub URL")
+	flag.StringVar(&papers.Path, "path", "./papers", "Absolute or relative path to papers folder")
+	flag.StringVar(&host, "host", "127.0.0.1", "IP address to listen on")
+	flag.Uint64Var(&port, "port", 9090, "Port to listen on")
+	flag.StringVar(&user, "user", "", "Username for /admin/ endpoints (optional)")
+	flag.StringVar(&pass, "pass", "", "Password for /admin/ endpoints (optional)")
+	flag.Parse()
+
+	papers.Path, _ = filepath.Abs(papers.Path)
+
+	if _, err := os.Stat(papers.Path); os.IsNotExist(err) {
+		os.Mkdir(papers.Path, os.ModePerm)
+	}
+	if err := papers.PopulatePapers(); err != nil {
+		panic(err)
+	}
+	if net.ParseIP(host) == nil {
+		panic(errors.New("Host flag could not be parsed; is it an IP address?"))
+	}
+
+	// prefer system-installed template assets over project-local paths
+	if _, err := os.Stat(filepath.Join(buildPrefix, "/share/crane/templates")); err != nil {
+		dir, err := filepath.Abs(filepath.Dir(os.Args[0]))
+		if err != nil {
+			log.Fatal(err)
+		}
+		templateDir = filepath.Join(dir, "templates")
+	} else {
+		templateDir = filepath.Join(buildPrefix, "/share/crane/templates")
+	}
+
+	http.HandleFunc("/", papers.IndexHandler)
+	http.HandleFunc("/admin/", papers.AdminHandler)
+	http.HandleFunc("/admin/edit/", papers.EditHandler)
+	http.HandleFunc("/admin/add/", papers.AddHandler)
+	http.HandleFunc("/download/", papers.DownloadHandler)
+	fmt.Printf("Listening on %v port %v (http://%v:%v/)\n", host, port, host, port)
+	log.Fatal(http.ListenAndServe(fmt.Sprintf("%s:%d", host, port), nil))
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..b905287
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,5 @@
+module crane
+
+go 1.14
+
+require golang.org/x/net v0.0.0-20210119194325-5f4716e94777
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..fb889a9
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,6 @@
+golang.org/x/net v0.0.0-20210119194325-5f4716e94777 h1:003p0dJM77cxMSyCPFphvZf/Y5/NXf5fzg6ufd1/Oew=
+golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
diff --git a/screenshots/admin.png b/screenshots/admin.png
new file mode 100644
index 0000000..bbbb96b
--- /dev/null
+++ b/screenshots/admin.png
diff --git a/templates/admin-edit.html b/templates/admin-edit.html
new file mode 100644
index 0000000..58d93f5
--- /dev/null
+++ b/templates/admin-edit.html
@@ -0,0 +1,69 @@
+{{ template "layout.html" . }}
+{{ define "content" }}
+<p>{{ .Status }}</p>
+<table id='header'>
+  <tr>
+  {{ $categoryCount := len .Papers }}
+  {{ if gt $categoryCount 0 }}
+    <td class='inpt'>
+      <form method='post' action='/admin/edit/'>
+        <input type="text" id="rename-category" name="rename-to" placeholder="Mathematics">
+        <select class="btn" name="rename-category" id="category">
+        {{ range $category, $papers := .Papers }}
+        <option value="{{ $category }}">{{ $category }}</option>
+        {{ end }}
+        </select>
+        <input class='btn' type="submit" value="Rename Category">
+      </form>
+    </td>
+  {{ end }}
+  </tr>
+</table>
+<table class='tabs'>
+  <tr>
+    <td><a class='active' href='/admin/'>Back</a></td>
+  </tr>
+</table>
+<div class='content'>
+{{ if gt $categoryCount 0 }}
+<form method='post' action='/admin/edit/'>
+  <select class="btn" name="action" id="Action">
+    <optgroup label="Action">
+      <option value="delete">Delete</option>
+    </optgroup>
+    <optgroup label="Move To">
+      {{ range $category, $papers := .Papers }}
+      <option value="move-{{ $category }}">{{ $category }}</option>
+      {{ end }}
+    </optgroup>
+  </select>
+  <input class='btn' type="submit" value="Save" />
+<table summary='paper list' class='list nowrap'>
+{{ range $category, $papers := .Papers }}
+  {{ $paperCount := len $papers }}
+  <tr class='nohover-highlight'>
+		<td colspan='4' class='papersection'>
+			<input type="checkbox" id="{{ $category }}" name="category" value="{{ $category }}">
+			<label for="{{ $category }}">{{ $category }}</label>
+		</td>
+	</tr>
+  {{ range $path, $paper := $papers }}
+  <tr>
+    {{ if $paper.Meta.Title }}
+    <td class="sublevel-paper title"><input type="checkbox" id="{{ $path }}" name="paper" value="{{ $path }}"><label for="{{ $path }}"><a href='/download/{{ $path }}' title='{{ $paper.Meta.Title }}'>{{ $paper.Meta.Title }}</a></label></td>
+    {{ else }}
+    <td class="sublevel-paper title"><input type="checkbox" id="{{ $path }}" name="paper" value="{{ $path }}"><label for="{{ $path }}"><a href='/download/{{ $path }}' title='{{ $paper.PaperName }}'>{{ $paper.PaperName }}</a></label></td>
+    {{ end }}
+  </tr>
+  <tr>
+    {{ if $paper.Meta.Title }}
+    <td class="sub">author: {{ $contCount := len $paper.Meta.Contributors }} {{ if gt $contCount 0 }}{{ $author := index $paper.Meta.Contributors 0 }}<a href='/?a={{ $author.LastName }}'>{{ $author.LastName }}{{ if gt $contCount 1 }} et al.{{ end }}{{ end }}</a> yr: <a href="?y={{ $paper.Meta.PubYear }}">{{ $paper.Meta.PubYear }}</a> doi: <a href="https://doi.org/{{ $paper.Meta.DOI }}">{{ $paper.Meta.DOI }}</a> journal: <font color="black">{{ $paper.Meta.Journal }}</font></td>
+    {{ else }}
+    <td></td>
+    {{ end }}
+  </tr>
+  {{ end }}
+{{ end }}
+{{ end }}
+</table></form></div>
+{{ end }}
diff --git a/templates/admin.html b/templates/admin.html
new file mode 100644
index 0000000..8c5222c
--- /dev/null
+++ b/templates/admin.html
@@ -0,0 +1,50 @@
+{{ template "layout.html" . }}
+{{ define "content" }}
+{{ if .LastPaperDL }}
+<p>{{ .Status }} (<a style="text-decoration: underline;" href="/download/{{ .LastPaperDL }}">download</a>)</p>
+{{ else }}
+<p>{{ .Status }}</p>
+{{ end }}
+<table id='header'>
+  <tr>
+    <td class='inpt'>
+      <form method='post' action='/admin/add/'>
+      <input type='text' name='new-category' placeholder="Mathematics" value=''/>
+      <input class='btn' type="submit" value="New Category" />
+    </form>
+		</td>
+	</tr>
+  {{ $categoryCount := len .Papers }}
+  {{ if gt $categoryCount 0 }}
+	<tr>
+    <td class='inpt'>
+      <form method='post' action='/admin/add/'>
+        <input type='text' name='dl-paper' placeholder="URL or DOI" value=''/>
+        <select class="btn" name="dl-category" id="category">
+          {{ $lastUsedCategory := .LastUsedCategory }}
+          {{ if $lastUsedCategory }}
+          <option value="{{ .LastUsedCategory }}">{{ $lastUsedCategory }}</option>
+          {{ end }}
+          {{ range $category, $papers := .Papers }}
+          {{ if ne $category $lastUsedCategory }}
+          <option value="{{ $category }}">{{ $category }}</option>
+          {{ end }}
+          {{ end }}
+        </select>
+        <input class='btn' type="submit" value="Download" />
+      </form>
+    </td>
+  </tr>
+  {{ end }}
+</table>
+{{ if gt $categoryCount 0 }}
+<table class='tabs'>
+  <tr>
+    <td><a class='active' href='/admin/edit/'>Edit</a></td>
+  </tr>
+</table>
+<div class='content'>
+{{ block "list" . }}{{ end }}
+</div>
+{{ end }}
+{{ end }}
diff --git a/templates/index.html b/templates/index.html
new file mode 100644
index 0000000..7de21f7
--- /dev/null
+++ b/templates/index.html
@@ -0,0 +1,16 @@
+{{ template "layout.html" . }}
+{{ define "content" }}
+<table id='header'>
+</table>
+{{ $categoryCount := len .Papers }}
+{{ if gt $categoryCount 0 }}
+<table class='tabs'>
+  <td><a class='active' href='/admin/'>Manage</a></td>
+</table>
+<div class='content'>
+{{ block "list" . }}{{ end }}
+</div>
+{{ else }}
+<p>nothing here yet, <a style="text-decoration:underline;" href="/admin/">create a category</a> to start downloading papers</p>
+{{ end }}
+{{ end }}
diff --git a/templates/layout.html b/templates/layout.html
new file mode 100644
index 0000000..26130d0
--- /dev/null
+++ b/templates/layout.html
@@ -0,0 +1,296 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8"/>
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Crane</title>
+<style>
+@media (min-width: 601px) {
+    div#crane {
+        font-size: 16px;
+    }
+}
+@media (max-width: 600px) {
+    div#crane {
+        font-size: 14px;
+    }
+}
+
+div#crane {
+  font-family: monospace;
+  padding: 0em;
+  margin: 0em;
+  background: white;
+  padding: 4px;
+}
+
+div#crane a {
+  color: blue;
+  text-decoration: none;
+}
+
+div#crane table#header input[type="text"] {
+  font-family: inherit;
+  font-size: 100%;
+  display: block;
+  width: calc(100% - 1rem);
+  border: 1px solid #888;
+  border-color: #888;
+  padding: .375rem;
+  background-color: #fff;
+  background-clip: padding-box;
+  margin-bottom: .5em;
+  line-height: 1.5;
+  transition: border-color .15s ease-in-out,box-shadow .15s ease-in-out;
+}
+
+div#crane table#header .btn {
+  display: inline-block;
+  margin-bottom: 1rem;
+}
+
+div#crane .btn {
+  font-family: inherit;
+  font-size: 85%;
+  color: black;
+  text-overflow: unset;
+  display: inline;
+  background: #e9ecef;
+  border: #343a40 1px solid;
+	padding: 2px 8px;
+	border-radius: 0;
+}
+
+div#crane a:hover {
+  text-decoration: underline;
+}
+div#crane a {
+  color: #000;
+}
+
+div#crane table {
+  border-collapse: collapse;
+}
+
+div#crane table#header {
+  width: 100%;
+  margin-bottom: 1em;
+}
+
+div#crane table#header td.inpt {
+  padding-left: 10px;
+  white-space: nowrap;
+  border-collapse: collapse;
+  color: #000;
+}
+
+div#crane table#header td.main {
+  font-size: 250%;
+  padding-left: 10px;
+  white-space: nowrap;
+  border-collapse: collapse;
+}
+
+div#crane table#header td.form {
+  text-align: left;
+  vertical-align: bottom;
+  padding-bottom: 2px;
+  white-space: nowrap;
+}
+
+div#crane table#header td.form form,
+div#crane table#header td.form input,
+div#crane table#header td.form select {
+}
+
+div#crane table#header td.sub {
+  color: #777;
+  border-top: solid 1px #ccc;
+  padding-left: 10px;
+}
+
+div#crane table.tabs {
+  border-bottom: solid 2px #ccc;
+  border-collapse: nowrap;
+  margin-top: 1em;
+  margin-bottom: 0px;
+  width: 100%;
+}
+
+div#crane table.tabs td {
+  padding: 0px 0px 0px;
+  vertical-align: bottom;
+}
+
+div#crane table.tabs td a {
+  font-size: 90%;
+  padding: 2px 0.75em;
+  color: black;
+  background-color: #ccc;
+}
+
+div#crane table.tabls td.form {
+  text-align: right;
+}
+
+div#crane table.tabs td.form form {
+  padding-bottom: 2px;
+  white-space: normal;
+  padding-left: 1em;
+}
+
+div#crane table.tabs td.form input,
+div#crane table.tabs td.form select {
+}
+
+div#crane div.content {
+  margin-top: 1em;
+  padding-bottom: 1em;
+  border-bottom: solid 2px #ccc;
+}
+
+div#crane table.list {
+  width: 100%;
+  border: none;
+  border-collapse: collapse;
+}
+
+div#crane table.list tr {
+  background: white;
+}
+
+div#crane table.list:first-child tr:first-child td:first-child {
+  padding-top: 0em;
+}
+
+div#crane table.list td.sub {
+  padding-left: 4em;
+  font-size: 80%;
+}
+
+div#crane table.list th {
+  font-weight: bold;
+  /* color: #888;
+  border-top: dashed 1px #888;
+  border-bottom: dashed 1px #888;
+  */
+  padding: 0.1em 0.5em 0.05em 0.5em;
+  vertical-align: baseline;
+}
+
+div#crane table.list td {
+  border: none;
+  padding: 0.1em 0.5em 0.1em 0.5em;
+}
+
+div#crane table.list td.title {
+  text-decoration: underline;
+  padding-left: 1.5em;
+}
+
+div#crane table.list td.authors {
+  max-width: 10em;
+}
+
+div#crane table.list td.year {
+  max-width: 5em;
+}
+
+div#crane table.list td.doi {
+  max-width: 5em;
+}
+
+div#crane table.list td a {
+  color: black;
+}
+
+div#crane table.list td a:hover {
+  color: #00f;
+}
+
+div#crane img {
+  border: none;
+}
+
+div#crane td#content {
+  vertical-align: top;
+  padding: 1em 2em 1em 1em;
+  border: none;
+}
+
+div#crane table.nowrap td {
+  white-space: normal;
+}
+
+div#crane .left {
+  text-align: left;
+}
+
+div#crane .right {
+  text-align: right;
+}
+
+div#crane table.list td.papersection {
+  font-weight: bold;
+  padding-top: 1em;
+  padding-bottom: .5em;
+  color: black;
+}
+
+div#crane table.list td.sublevel-paper {
+}
+
+div#crane div.footer {
+  margin-top: 0.5em;
+  text-align: center;
+  font-size: 80%;
+  color: #ccc;
+}
+
+div#crane div.footer a {
+  color: #ccc;
+  text-decoration: none;
+}
+
+div#crane div.footer a:hover {
+  text-decoration: underline;
+}
+
+* { line-height: 1.25em; }
+
+div#crane {
+  max-width: 55em;
+  margin: auto;
+  -moz-tab-size: 4;
+  tab-size: 4;
+}
+
+div#crane table.list th a {
+  color: inherit;
+}
+
+div#crane table.list tr.nohover-highlight:hover:nth-child(even) {
+  background: inherit;
+}
+
+div#crane table.blob td.linenumbers a:target {
+  color: goldenrod;
+  text-decoration: underline;
+  outline: none;
+}
+
+</style>
+</head>
+<body>
+<center>
+<div id='crane'>
+<table id='header'>
+  <tr><td class='main'><a href="/">Crane</a></td></tr>
+  <tr><td class='sub'>Research literature archival and categorization</td></tr>
+</table>
+{{ block "content" . }}{{ end }}
+<div class='footer'><a href='https://git.jordan.im/crane'>crane</a></div>
+</div>
+</center>
+</body>
+</html>
diff --git a/templates/list.html b/templates/list.html
new file mode 100644
index 0000000..718a732
--- /dev/null
+++ b/templates/list.html
@@ -0,0 +1,29 @@
+{{ define "list" }}
+<table summary='paper list' class='list nowrap'>
+{{ range $category, $papers := .Papers }}
+  {{ $paperCount := len $papers }}
+  {{ if ge $paperCount 1 }}
+  <tr class='nohover-highlight'>
+		<td colspan='4' class='papersection'>{{ $category }}</td>
+	</tr>
+  {{ range $path, $paper := $papers }}
+  <tr>
+    {{ if $paper.Meta.Title }}
+    <td class="sublevel-paper title"><a href='/download/{{ $path }}' title='{{ $paper.Meta.Title }}'>{{ $paper.Meta.Title }}</a></td>
+    {{ else }}
+    <td class="sublevel-paper title"><a href='/download/{{ $path }}' title='{{ $paper.PaperName }}'>{{ $paper.PaperName }}</a></td>
+    {{ end }}
+  </tr>
+  <tr>
+    {{ if $paper.Meta.Title }}
+    <td class="sub">author: {{ $contCount := len $paper.Meta.Contributors }} {{ if gt $contCount 0 }}{{ $author := index $paper.Meta.Contributors 0 }}<a href='/?a={{ $author.LastName }}'>{{ $author.LastName }}{{ if gt $contCount 1 }} et al.{{ end }}{{ end }}</a> yr: <a href="?y={{ $paper.Meta.PubYear }}">{{ $paper.Meta.PubYear }}</a> doi: <a href="https://doi.org/{{ $paper.Meta.DOI }}">{{ $paper.Meta.DOI }}</a> journal: <font color="black">{{ $paper.Meta.Journal }}</font></td>
+    {{ else }}
+    <td></td>
+    {{ end }}
+  </tr>
+  {{ end }}
+  {{ else }}
+  {{ end }}
+{{ end }}
+</table>
+{{ end }}
diff --git a/util.go b/util.go
new file mode 100644
index 0000000..93bf0a6
--- /dev/null
+++ b/util.go
@@ -0,0 +1,237 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net"
+	"net/http"
+	"os"
+	"regexp"
+)
+
+var privateIPBlocks []*net.IPNet
+
+// isPrivateIP checks to if the provided IP address is a loopback, link-local
+// or unique-local address
+//
+// credit: https://stackoverflow.com/a/50825191
+func isPrivateIP(ip net.IP) bool {
+	if privateIPBlocks == nil {
+		for _, cidr := range []string{
+			"127.0.0.0/8",    // IPv4 loopback
+			"10.0.0.0/8",     // RFC1918
+			"172.16.0.0/12",  // RFC1918
+			"192.168.0.0/16", // RFC1918
+			"169.254.0.0/16", // RFC3927 link-local
+			"::1/128",        // IPv6 loopback
+			"fe80::/10",      // IPv6 link-local
+			"fc00::/7",       // IPv6 unique local addr
+		} {
+			_, block, err := net.ParseCIDR(cidr)
+			if err != nil {
+				panic(fmt.Errorf("parse error on %q: %v", cidr, err))
+			}
+			privateIPBlocks = append(privateIPBlocks, block)
+		}
+	}
+	if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+		return true
+	}
+	for _, block := range privateIPBlocks {
+		if block.Contains(ip) {
+			return true
+		}
+	}
+	return false
+}
+
+// getDOIFromBytes returns the DOI parsed from the provided []byte slice
+func getDOIFromBytes(b []byte) []byte {
+	re := regexp.MustCompile(`(10[.][0-9]{4,}[^\s"/<>]*/[^\s"'<>,\{\};:\[\]\?&]+)`)
+	return re.Find(b)
+}
+
+// makeRequest makes a request to a remote resource using the provided
+// *http.Client and returns its *http.Response
+func makeRequest(client *http.Client, u string) (*http.Response, error) {
+	req, err := http.NewRequest("GET", u, nil)
+
+	// sciencedirect and company block atypical user agents
+	req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0")
+
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("%q: status code not OK", u)
+	}
+	return resp, nil
+}
+
+// getDOIFromPage returns the parsed DOI from the body of the *http.Response
+// provided
+func getDOIFromPage(resp *http.Response) []byte {
+	defer resp.Body.Close()
+	scanner := bufio.NewScanner(resp.Body)
+	for scanner.Scan() {
+		doi := getDOIFromBytes(scanner.Bytes())
+		if doi != nil {
+			return doi
+		}
+		if err := scanner.Err(); err != nil {
+			return nil
+		}
+	}
+	return nil
+}
+
+// renameFile is an alternative to os.Rename which supports moving files
+// between devices where os.Rename would return an error (cross-device link)
+func renameFile(src string, dst string) (err error) {
+	if src == dst {
+		return nil
+	}
+	err = copyFile(src, dst)
+	if err != nil {
+		return fmt.Errorf("failed to copy source file %s to %s: %s", src, dst, err)
+	}
+	err = os.RemoveAll(src)
+	if err != nil {
+		return fmt.Errorf("failed to cleanup source file %s: %s", src, err)
+	}
+	return nil
+}
+
+// copyFile copies a file located at src to dst, used by renameFile()
+//
+// credit: https://gist.github.com/r0l1/92462b38df26839a3ca324697c8cba04
+func copyFile(src, dst string) (err error) {
+	in, err := os.Open(src)
+	if err != nil {
+		return
+	}
+	defer in.Close()
+
+	out, err := os.Create(dst)
+	if err != nil {
+		return
+	}
+	defer func() {
+		if e := out.Close(); e != nil {
+			err = e
+		}
+	}()
+
+	_, err = io.Copy(out, in)
+	if err != nil {
+		return
+	}
+
+	err = out.Sync()
+	if err != nil {
+		return
+	}
+
+	si, err := os.Stat(src)
+	if err != nil {
+		return
+	}
+	err = os.Chmod(dst, si.Mode())
+	if err != nil {
+		return
+	}
+
+	return
+}
+
+// getMetaFromDOI saves doi.org API data to TempFile and returns its path
+func getMetaFromDOI(client *http.Client, doi []byte) (string, error) {
+	u := "https://doi.org/" + string(doi)
+	req, err := http.NewRequest("GET", u, nil)
+
+	req.Header.Add("Accept", "application/vnd.crossref.unixref+xml;q=1,application/rdf+xml;q=0.5")
+	resp, err := client.Do(req)
+	if err != nil {
+		return "", err
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return "", fmt.Errorf("%q: status code not OK, DOI invalid?", u)
+	}
+	if resp.Header.Get("Content-Type") != "application/vnd.crossref.unixref+xml" {
+		return "", fmt.Errorf("%q: content-type not application/vnd.crossref.unixref+xml", u)
+	}
+	if err != nil {
+		return "", err
+	}
+
+	// create a temporary file to store XML stream
+	tmpXML, err := ioutil.TempFile("", "tmp-*.meta.xml")
+	if err != nil {
+		return "", err
+	}
+
+	// incrementally save XML data to the temporary file; saves memory using
+	// the filesystem instead of passing around buffers
+	if err := saveRespBody(resp, tmpXML.Name()); err != nil {
+		return "", err
+	}
+	if err := tmpXML.Close(); err != nil {
+		return "", err
+	}
+	return tmpXML.Name(), nil
+}
+
+// getPaper saves makes an outbound request to a remote resource and saves the
+// response body to a temporary file, returning its path, provided the response
+// has the content-type application/pdf
+func getPaper(client *http.Client, u string) (string, error) {
+	req, err := http.NewRequest("GET", u, nil)
+
+	// sci-hub gives us the paper directly (no iframe) if we're on mobile
+	req.Header.Add("User-Agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 13_7 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Mobile/15E148 Safari/604.1")
+
+	resp, err := client.Do(req)
+	if err != nil {
+		return "", err
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return "", fmt.Errorf("%q: status code not OK", u)
+	}
+	if resp.Header.Get("Content-Type") != "application/pdf" {
+		return "", fmt.Errorf("%q: content-type not application/pdf", u)
+	}
+	tmpPDF, err := ioutil.TempFile("", "tmp-*.pdf")
+	if err != nil {
+		return "", err
+	}
+
+	// write resp.Body (paper data) to tmpPDF
+	if err := saveRespBody(resp, tmpPDF.Name()); err != nil {
+		return "", err
+	}
+	if err := tmpPDF.Close(); err != nil {
+		return "", err
+	}
+	return tmpPDF.Name(), nil
+}
+
+// saveRespBody writes the provided http.Response to path
+func saveRespBody(resp *http.Response, path string) error {
+	out, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+
+	r := http.MaxBytesReader(nil, resp.Body, MAX_SIZE)
+	_, err = io.Copy(out, r)
+	if err != nil {
+		return err
+	}
+	return nil
+}
author	Jordan <me@jordan.im>	2021-02-16 16:48:56 -0700
committer	Jordan <me@jordan.im>	2021-02-16 16:48:56 -0700
commit	baa10aeb413a5ac109e56db5fd7a8d62a4d5965f (patch)
tree	b063d3df79882cb454329e6469088b00d382113f
download	crane-baa10aeb413a5ac109e56db5fd7a8d62a4d5965f.tar.gz crane-baa10aeb413a5ac109e56db5fd7a8d62a4d5965f.zip