diff options
author | Jordan <me@jordan.im> | 2021-02-16 16:48:56 -0700 |
---|---|---|
committer | Jordan <me@jordan.im> | 2021-02-16 16:48:56 -0700 |
commit | baa10aeb413a5ac109e56db5fd7a8d62a4d5965f (patch) | |
tree | b063d3df79882cb454329e6469088b00d382113f | |
download | crane-baa10aeb413a5ac109e56db5fd7a8d62a4d5965f.tar.gz crane-baa10aeb413a5ac109e56db5fd7a8d62a4d5965f.zip |
initial commit
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | Makefile | 26 | ||||
-rw-r--r-- | README.md | 46 | ||||
-rw-r--r-- | crane.go | 740 | ||||
-rw-r--r-- | go.mod | 5 | ||||
-rw-r--r-- | go.sum | 6 | ||||
-rw-r--r-- | screenshots/admin.png | bin | 0 -> 172290 bytes | |||
-rw-r--r-- | templates/admin-edit.html | 69 | ||||
-rw-r--r-- | templates/admin.html | 50 | ||||
-rw-r--r-- | templates/index.html | 16 | ||||
-rw-r--r-- | templates/layout.html | 296 | ||||
-rw-r--r-- | templates/list.html | 29 | ||||
-rw-r--r-- | util.go | 237 |
13 files changed, 1524 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b4938ff --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.swp +*.swo +papers +crane diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4eb8879 --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +.POSIX: +.SUFFIXES: + +GO = go +RM = rm +GOFLAGS = +PREFIX = /usr/local +BINDIR = $(PREFIX)/bin +SHAREDIR = $(PREFIX)/share/crane + +goflags = $(GOFLAGS) + +all: crane + +crane: + $(GO) build $(goflags) -ldflags "-X main.buildPrefix=$(PREFIX)" + +clean: + $(RM) -f crane + +install: all + mkdir -p $(DESTDIR)$(BINDIR) + mkdir -p $(DESTDIR)$(SHAREDIR) + cp -f crane $(DESTDIR)$(BINDIR) + cp -R templates $(DESTDIR)$(SHAREDIR) + diff --git a/README.md b/README.md new file mode 100644 index 0000000..d06e6de --- /dev/null +++ b/README.md @@ -0,0 +1,46 @@ +# Crane + +Crane is a minimal self-hosted research literature organizational web service +with support for paper download and metadata retrieval. + +No databases or app-proprietary formats are used. Papers are categorized by the +directories in which they're stored, and XML metadata is retrieved from the +[doi.org](https://www.doi.org/) API and written alongside each paper for which +its DOI is known. + +![admin](screenshots/admin.png) + +## Installation + +Crane can be compiled with `make` or `go build`, and installed system-wide by +running `make install` with root-level permissions. + +## Usage + +Crane can be run locally or on a server. The index (`"/"`) endpoint lists papers +but does not permits modification to the set. The admin (`"/admin/"`) endpoint +supports optional authentication and permits paper download, deletion, and +moving between categories, as well as category addition, deletion, and rename. + +``` +Usage of ./crane: + -host string + IP address to listen on (default "127.0.0.1") + -port uint + Port to listen on (default 9090) + -path string + Absolute or relative path to papers folder (default "./papers") + -sci-hub string + Sci-Hub URL (default "https://sci-hub.se/") + -user string + Username for /admin/ endpoints (optional) + -pass string + Password for /admin/ endpoints (optional) +``` + +By default, crane listens on `127.0.0.1:9090` but this is configurable with the +`--host` and `--port` parameters. Authentication is optional but can be enabled +with `--user` and `--pass` parameters; the index is always publicly accessible. + +Papers are written to `--path`, stored in directories which serve as paper +categories. diff --git a/crane.go b/crane.go new file mode 100644 index 0000000..983a4b9 --- /dev/null +++ b/crane.go @@ -0,0 +1,740 @@ +package main + +import ( + "bufio" + "context" + "encoding/xml" + "errors" + "flag" + "fmt" + "html/template" + "io/ioutil" + "log" + "mime" + "net" + "net/http" + "net/http/cookiejar" + "net/url" + "os" + "path/filepath" + "strings" + + "golang.org/x/net/publicsuffix" +) + +const MAX_SIZE int64 = 50000000 // max incoming HTTP request body size (50MB) + +var ( + client *http.Client + scihubURL string + host string + port uint64 + user string + pass string + buildPrefix string + templateDir string +) + +type Contributor struct { + FirstName string `xml:"given_name"` + LastName string `xml:"surname"` + Role string `xml:"contributor_role,attr"` + Sequence string `xml:"sequence,attr"` +} + +type Meta struct { + XMLName xml.Name `xml:"doi_records"` + Journal string `xml:"doi_record>crossref>journal>journal_metadata>full_title"` + ISSN string `xml:"doi_record>crossref>journal>journal_metadata>issn"` + Title string `xml:"doi_record>crossref>journal>journal_article>titles>title"` + Contributors []Contributor `xml:"doi_record>crossref>journal>journal_article>contributors>person_name"` + PubYear string `xml:"doi_record>crossref>journal>journal_article>publication_date>year"` + PubMonth string `xml:"doi_record>crossref>journal>journal_article>publication_date>month"` + FirstPage string `xml:"doi_record>crossref>journal>journal_article>pages>first_page"` + LastPage string `xml:"doi_record>crossref>journal>journal_article>pages>last_page"` + DOI string `xml:"doi_record>crossref>journal>journal_article>doi_data>doi"` + Resource string `xml:"doi_record>crossref>journal>journal_article>doi_data>resource"` +} + +type Paper struct { + Meta Meta + MetaPath string + PaperName string + PaperPath string +} + +type Papers struct { + List map[string]map[string]*Paper + Path string +} + +type Resp struct { + Papers map[string]map[string]*Paper + Status string + LastPaperDL string + LastUsedCategory string +} + +// getPaperFileNameFromMeta returns the built filename (absent an extension) +// from doi.org metadata, consisting of the lowercase last name of the first +// author followed by the year of publication (e.g. doe2020) +func getPaperFileNameFromMeta(p *Meta) string { + var mainAuthor string + for _, contributor := range p.Contributors { + if contributor.Sequence == "first" { + mainAuthor = strings.Replace(contributor.LastName, "..", "", -1) + mainAuthor = strings.Replace(contributor.LastName, "/", "", -1) + break + } + } + if mainAuthor == "" || p.PubYear == "" { + return "" + } + pubYear := strings.Replace(p.PubYear, "..", "", -1) + pubYear = strings.Replace(p.PubYear, "/", "", -1) + return fmt.Sprint(strings.ToLower(mainAuthor), pubYear) +} + +// getPaperFileNameFromResp returns the name of the file present at resp taken +// first from content-disposition (if exists) then its destination URL +// following redirects; e.g. doe2020 +func getPaperFileNameFromResp(resp *http.Response) string { + var filename string + if disp, ok := resp.Header["Content-Disposition"]; ok { + _, params, _ := mime.ParseMediaType(disp[0]) + if f, ok := params["filename"]; ok && f != "" { + filename = f + } + } + if filename == "" { + u, _ := url.Parse(resp.Request.URL.String()) + filename = strings.TrimSuffix(filepath.Base(u.Path), "/") + } + filename = strings.TrimSuffix(filename, ".pdf") + return filename +} + +// getUniqueName ensures the provided paper name is unique, appending "-$ext" +// until a unique name is found and returned +func (papers *Papers) getUniqueName(c string, name string) string { + ext := 2 + for { + k := filepath.Join(c, name+".pdf") + if _, exists := papers.List[c][k]; exists != true { + break + } else { + name = fmt.Sprint(name, "-", ext) + ext++ + } + } + return name +} + +// findPapersWalk is a WalkFunc passed to filepath.Walk() to process papers +// stored on the filesystem +func (papers *Papers) findPapersWalk(path string, info os.FileInfo, err error) error { + // skip the papers.Path root directory + if p, _ := filepath.Abs(path); p == papers.Path { + return nil + } + + // derive category name (e.g. Mathematics) from directory name; used as key + var c string + if i, _ := os.Stat(path); i.IsDir() { + c = strings.TrimPrefix(path, papers.Path+"/") + } else { + c = strings.TrimPrefix(filepath.Dir(path), papers.Path+"/") + } + if _, exists := papers.List[c]; exists == false { + papers.List[c] = make(map[string]*Paper) + } + + // now that category was added, ensure file is actually a PDF + if filepath.Ext(path) != ".pdf" { + return nil + } + + var p Paper + p.PaperName = strings.TrimSuffix(filepath.Base(path), filepath.Ext(path)) + p.PaperPath = filepath.Join(papers.Path, filepath.Join(c, p.PaperName+".pdf")) + + // XML metadata is not required but highly recommended; PDFs aren't parsed + // so its our source only source of metadata at the moment + // + // PDF parsing looks (and probably is) fairly annoying to support and might + // be better handled by an external script + metaPath := filepath.Join(papers.Path, filepath.Join(c, p.PaperName+".meta.xml")) + if _, err := os.Stat(metaPath); err == nil { + p.MetaPath = metaPath + + f, err := os.Open(p.MetaPath) + if err != nil { + return err + } + + // memory-efficient relative to ioutil.ReadAll() + r := bufio.NewReader(f) + d := xml.NewDecoder(r) + + // populate p struct with values derived from doi.org metadata + if err := d.Decode(&p.Meta); err != nil { + return err + } + if err := f.Close(); err != nil { + return err + } + } + + // finally add paper to papers.List set; the subkey is the paper path + // relative to papers.Path, e.g. Mathematics/example2020.pdf + papers.List[c][filepath.Join(c, p.PaperName+".pdf")] = &p + return nil +} + +// PopulatePapers wraps filepath.Walk() and populates the papers set with +// discovered papers +func (papers *Papers) PopulatePapers() error { + if err := filepath.Walk(papers.Path, papers.findPapersWalk); err != nil { + return err + } + return nil +} + +// NewPaperFromDirectLink contains routines used to retrieve papers from remote +// endpoints provided a direct link's http.Response +func (papers *Papers) NewPaperFromDirectLink(resp *http.Response, c string) (*Paper, error) { + tmpPDF, err := ioutil.TempFile("", "tmp-*.pdf") + if err != nil { + return &Paper{}, err + } + err = saveRespBody(resp, tmpPDF.Name()) + if err != nil { + return &Paper{}, err + } + if err := tmpPDF.Close(); err != nil { + return &Paper{}, err + } + defer os.Remove(tmpPDF.Name()) + + var p Paper + p.PaperName = papers.getUniqueName(c, getPaperFileNameFromResp(resp)) + if err != nil { + return &Paper{}, err + } + p.PaperPath = filepath.Join(papers.Path, filepath.Join(c, p.PaperName+".pdf")) + + if err := renameFile(tmpPDF.Name(), p.PaperPath); err != nil { + return nil, err + } + papers.List[c][filepath.Join(c, p.PaperName+".pdf")] = &p + return &p, nil +} + +// NewPaperFromDOI contains routines used to retrieve papers from remote +// endpoints provided a DOI +func (papers *Papers) NewPaperFromDOI(doi []byte, c string) (*Paper, error) { + tmpXML, err := getMetaFromDOI(client, doi) + if err != nil { + return nil, err + } + defer os.Remove(tmpXML) + + // open temporary XML file for parsing + f, err := os.Open(tmpXML) + if err != nil { + return nil, err + } + r := bufio.NewReader(f) + d := xml.NewDecoder(r) + + // populate p struct with values derived from doi.org metadata + var p Paper + if err := d.Decode(&p.Meta); err != nil { + return nil, err + } + if err := f.Close(); err != nil { + return nil, err + } + + n := getPaperFileNameFromMeta(&p.Meta) // doe2020 + if n == "" { + // last-resort condition if metadata lacking author or publication year + n = strings.Replace(string(doi), "..", "", -1) + n = strings.Replace(string(doi), "/", "", -1) + } + u := papers.getUniqueName(c, n) // doe2020-(2, 3, 4...) if n already exists + + // if not matching, check if DOIs match (genuine duplicate) + if n != u { + k := filepath.Join(c, n+".pdf") + if p.Meta.DOI == papers.List[c][k].Meta.DOI { + return nil, fmt.Errorf("paper %q with DOI %q already downloaded", n, string(doi)) + } + } + + p.PaperName = u + p.PaperPath = filepath.Join(filepath.Join(papers.Path, c), p.PaperName+".pdf") + p.MetaPath = filepath.Join(filepath.Join(papers.Path, c), p.PaperName+".meta.xml") + + // parse scihubURL and join it w/ the DOI (accounts for no trailing slash) + url, _ := url.Parse(scihubURL) + url.Path = filepath.Join(url.Path, string(doi)) + + // make outbound request to sci-hub, save paper to temporary location + tmpPDF, err := getPaper(client, url.String()) + if err != nil { + return nil, err + } + defer os.Remove(tmpPDF) + + if err := renameFile(tmpPDF, p.PaperPath); err != nil { + return nil, err + } + if err := renameFile(tmpXML, p.MetaPath); err != nil { + return nil, err + } + papers.List[c][filepath.Join(c, p.PaperName+".pdf")] = &p + return &p, nil +} + +// DeletePaper deletes the provided paper and its metadata from the filesystem +// and the papers.List set +func (papers *Papers) DeletePaper(p string) error { + // check if the category in which the paper is said to belong + // exists + c := filepath.Dir(p) + if _, exists := papers.List[c]; exists != true { + return fmt.Errorf("category %q does not exist\n", papers.List[filepath.Dir(p)]) + } + + // check if paper exists in the provided category + if _, exists := papers.List[c][p]; exists != true { + return fmt.Errorf("paper %q does not exist in category %q\n", p, c) + } + + // paper and category exists and the paper belongs to the provided + // category; remove it and its XML metadata + if err := os.Remove(papers.List[c][p].PaperPath); err != nil { + return err + } + + // XML metadata optional; delete it if it exists + metaPath := papers.List[c][p].MetaPath + if metaPath != "" { + if _, err := os.Stat(metaPath); err == nil { + if err := os.Remove(metaPath); err != nil { + return err + } + } + } + delete(papers.List[c], p) + return nil +} + +// DeleteCategory deletes the provided category and its contents from the +// filesystem and the papers.List set +func (papers *Papers) DeleteCategory(c string) error { + if _, exists := papers.List[c]; exists != true { + return fmt.Errorf("category %q does not exist in the set\n", c) + } + if err := os.RemoveAll(filepath.Join(papers.Path, c)); err != nil { + return err + } + // remove categories which exist as subcategories of the deleted category + // from the set + for k, _ := range papers.List { + if strings.HasPrefix(k, c+"/") { + delete(papers.List, k) + } + } + delete(papers.List, c) + return nil +} + +// MovePaper moves the provided paper to the destination category on the +// filesystem and the papers.List set +func (papers *Papers) MovePaper(p string, c string) error { + cPrev := filepath.Dir(p) + if _, exists := papers.List[cPrev]; exists != true { + return fmt.Errorf("category %q does not exist\n", cPrev) + } + if _, exists := papers.List[c]; exists != true { + return fmt.Errorf("category %q does not exist\n", c) + } + if _, exists := papers.List[cPrev][p]; exists != true { + return fmt.Errorf("paper %q does not exist in category %q\n", p, cPrev) + } + if _, exists := papers.List[c][p]; exists == true { + return fmt.Errorf("paper %q exists in destination category %q\n", p, c) + } + paperDest := filepath.Join(filepath.Join(papers.Path, c), papers.List[cPrev][p].PaperName+".pdf") + if err := os.Rename(papers.List[cPrev][p].PaperPath, paperDest); err != nil { + return err + } + papers.List[c][filepath.Join(c, filepath.Base(p))] = papers.List[cPrev][p] + papers.List[c][filepath.Join(c, filepath.Base(p))].PaperPath = paperDest + + // XML metadata optional; move it if it exists + metaPath := papers.List[cPrev][p].MetaPath + if metaPath != "" { + if _, err := os.Stat(metaPath); err == nil { + metaName := papers.List[cPrev][p].PaperName + ".meta.xml" + metaDest := filepath.Join(filepath.Join(papers.Path, c), metaName) + if err := os.Rename(metaPath, metaDest); err != nil { + return err + } + papers.List[c][filepath.Join(c, filepath.Base(p))].MetaPath = metaDest + } + } + delete(papers.List[cPrev], p) + return nil +} + +// RenameCategory renames the provided category on the filesystem and the +// paper.List set +func (papers *Papers) RenameCategory(c string, d string) error { + if _, exists := papers.List[c]; exists != true { + return fmt.Errorf("category %q does not exist in the set\n", c) + } + if _, exists := papers.List[d]; exists == true { + return fmt.Errorf("category %q already exists in the set\n", d) + } + if err := os.Rename(filepath.Join(papers.Path, c), filepath.Join(papers.Path, d)); err != nil { + return err + } + papers.List[d] = make(map[string]*Paper) + for k, v := range papers.List[c] { + pPaperPath := filepath.Join(papers.Path, filepath.Join(d, v.PaperName+".pdf")) + pK := filepath.Join(d, filepath.Base(k)) + papers.List[d][pK] = papers.List[c][k] + papers.List[d][pK].PaperPath = pPaperPath + + if v.MetaPath != "" { + pMetaPath := filepath.Join(papers.Path, filepath.Join(d, v.PaperName+".meta.xml")) + papers.List[d][pK].MetaPath = pMetaPath + } + } + delete(papers.List, c) + return nil +} + +// ProcessAddPaperInput processes user-provided input related to new paper +// download; c is the category, p can be a URL or DOI +func (papers *Papers) ProcessAddPaperInput(c string, p string) (*Paper, error) { + var doi []byte + if u, err := url.Parse(p); err == nil && u.Scheme != "" && u.Host != "" { + resp, err := makeRequest(client, p) + if err != nil { + return &Paper{}, err + } + if resp.Header.Get("Content-Type") == "application/pdf" { + paper, err := papers.NewPaperFromDirectLink(resp, c) + if err != nil { + return &Paper{}, err + } + return paper, nil + } + doi = getDOIFromPage(resp) + if doi == nil { + resp, err = makeRequest(client, scihubURL+p) + if err != nil { + return &Paper{}, fmt.Errorf("%q: DOI not found on page", p) + } + doi = getDOIFromPage(resp) + } + if doi == nil { + return &Paper{}, fmt.Errorf("%q: DOI not found on page", p) + } + } else { + doi = getDOIFromBytes([]byte(p)) + if doi == nil { + return &Paper{}, fmt.Errorf("%q is not a valid DOI or URL\n", p) + } + } + paper, err := papers.NewPaperFromDOI(doi, c) + if err != nil { + return &Paper{}, err + } + return paper, nil +} + +// IndexHandler renders the index of papers stored in papers.Path +func (papers *Papers) IndexHandler(w http.ResponseWriter, r *http.Request) { + // catch-all for paths unhandled by direct http.HandleFunc calls + if r.URL.Path != "/" { + http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) + return + } + t, _ := template.ParseFiles(filepath.Join(templateDir, "layout.html"), + filepath.Join(templateDir, "index.html"), + filepath.Join(templateDir, "list.html"), + ) + res := Resp{ + Papers: papers.List, + } + t.Execute(w, &res) +} + +// AdminHandler renders the index of papers stored in papers.Path with +// additional forms to modify the collection (add, delete, rename...) +func (papers *Papers) AdminHandler(w http.ResponseWriter, r *http.Request) { + t, _ := template.ParseFiles(filepath.Join(templateDir, "admin.html"), + filepath.Join(templateDir, "layout.html"), + filepath.Join(templateDir, "list.html"), + ) + res := Resp{ + Papers: papers.List, + } + if user != "" && pass != "" { + username, password, ok := r.BasicAuth() + if ok && user == username && pass == password { + t.Execute(w, &res) + } else { + w.Header().Add("WWW-Authenticate", `Basic realm="Please authenticate"`) + http.Error(w, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized) + } + } else { + t.Execute(w, &res) + } +} + +// EditHandler renders the index of papers stored in papers.Path, prefixing +// a checkbox to each unique paper and category for modification +func (papers *Papers) EditHandler(w http.ResponseWriter, r *http.Request) { + t, _ := template.ParseFiles(filepath.Join(templateDir, "admin-edit.html"), + filepath.Join(templateDir, "layout.html"), + filepath.Join(templateDir, "list.html"), + ) + res := Resp{ + Papers: papers.List, + } + if user != "" && pass != "" { + username, password, ok := r.BasicAuth() + if !ok || user != username || pass != password { + w.Header().Add("WWW-Authenticate", `Basic realm="Please authenticate"`) + http.Error(w, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized) + return + } + } + if err := r.ParseForm(); err != nil { + res.Status = err.Error() + t.Execute(w, &res) + return + } + + if action := r.FormValue("action"); action == "delete" { + for _, p := range r.Form["paper"] { + if res.Status != "" { + break + } + if err := papers.DeletePaper(p); err != nil { + res.Status = err.Error() + } + } + for _, c := range r.Form["category"] { + if res.Status != "" { + break + } + if err := papers.DeleteCategory(c); err != nil { + res.Status = err.Error() + } + } + if res.Status == "" { + res.Status = "delete successful" + } + } else if strings.HasPrefix(action, "move") { + cDest := strings.SplitN(action, "move-", 2)[1] + for _, p := range r.Form["paper"] { + if res.Status != "" { + break + } + if err := papers.MovePaper(p, cDest); err != nil { + res.Status = err.Error() + } + } + if res.Status == "" { + res.Status = "move successful" + } + } else { + rc := r.FormValue("rename-category") + rt := r.FormValue("rename-to") + if rc != "" && rt != "" { + // ensure filesystem safety of category names + rc = strings.Trim(strings.Replace(rc, "..", "", -1), "/.") + rt = strings.Trim(strings.Replace(rt, "..", "", -1), "/.") + + if err := papers.RenameCategory(rc, rt); err != nil { + res.Status = err.Error() + } + if res.Status == "" { + res.Status = "rename successful" + } + } + } + t.Execute(w, &res) +} + +// AddHandler provides support for new paper processing and category addition +func (papers *Papers) AddHandler(w http.ResponseWriter, r *http.Request) { + t, _ := template.ParseFiles(filepath.Join(templateDir, "admin.html"), + filepath.Join(templateDir, "layout.html"), + filepath.Join(templateDir, "list.html"), + ) + if user != "" && pass != "" { + username, password, ok := r.BasicAuth() + if !ok || user != username || pass != password { + w.Header().Add("WWW-Authenticate", `Basic realm="Please authenticate"`) + http.Error(w, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized) + return + } + } + p := r.FormValue("dl-paper") + c := r.FormValue("dl-category") + nc := r.FormValue("new-category") + + // sanitize input; we use the category to build the path used to save papers + nc = strings.Trim(strings.Replace(nc, "..", "", -1), "/.") + + addPaper := len(strings.TrimSpace(p)) > 0 && len(strings.TrimSpace(c)) > 0 + addCategory := len(strings.TrimSpace(nc)) > 0 + res := Resp{Papers: papers.List} + + // paper download, both required fields populated + if addPaper { + if paper, err := papers.ProcessAddPaperInput(c, p); err != nil { + res.Status = err.Error() + } else { + if paper.Meta.Title != "" { + res.Status = fmt.Sprintf("%q downloaded successfully", paper.Meta.Title) + } else { + res.Status = fmt.Sprintf("%q downloaded successfully", paper.PaperName) + } + res.LastPaperDL = strings.TrimPrefix(paper.PaperPath, papers.Path+"/") // example/doe2021.pdf + } + res.LastUsedCategory = c + } else if addCategory { + // accounts for nested category addition; e.g. "foo/bar/baz" where + // "foo/bar" and/or "foo" do not already exist + n := nc + for n != "." { + _, exists := papers.List[n] + if exists == true { + res.Status = fmt.Sprintf("category %q already exists", n) + } else if err := os.MkdirAll(filepath.Join(papers.Path, n), os.ModePerm); err != nil { + res.Status = fmt.Sprintf("category %q could not be created on the filesystem", n) + } else { + papers.List[n] = make(map[string]*Paper) + } + if res.Status != "" { + break + } + res.LastUsedCategory = n + n = filepath.Dir(n) + } + if res.Status == "" { + res.Status = fmt.Sprintf("category %q added successfully", nc) + } + } + t.Execute(w, &res) +} + +// DownloadHandler serves saved papers up for download +func (papers *Papers) DownloadHandler(w http.ResponseWriter, r *http.Request) { + p := strings.TrimPrefix(r.URL.Path, "/download/") + c := filepath.Dir(p) + + // return 404 if the provided paper category or paper key do not exist in + // the papers set + if _, exists := papers.List[c]; exists == false { + http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) + return + } + if _, exists := papers.List[c][p]; exists == false { + http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) + return + } + + // ensure the paper (PaperPath) actually exists on the filesystem + i, err := os.Stat(papers.List[c][p].PaperPath) + if os.IsNotExist(err) { + http.Error(w, http.StatusText(http.StatusNotFound), http.StatusNotFound) + } else if i.IsDir() { + http.Error(w, http.StatusText(http.StatusForbidden), http.StatusForbidden) + } else { + http.ServeFile(w, r, papers.List[c][p].PaperPath) + } +} + +func main() { + // some publishers have cookie + HTTP 302 checks (e.g. sagepub), let's look + // more like a real browser + options := cookiejar.Options{ + PublicSuffixList: publicsuffix.List, + } + cookies, err := cookiejar.New(&options) + if err != nil { + panic(err) + } + + // custom DialContext which blocks outbound requests to local addresses and + // interfaces (security) + http.DefaultTransport.(*http.Transport).DialContext = func(ctx context.Context, network, addr string) (net.Conn, error) { + // we could run our check after a dial, but we'd have to discard + // connect errors to prevent exposure of local services; a preemptive + // lookup is the lesser of two evils, I think + hosts, _ := net.LookupHost(addr[:strings.LastIndex(addr, ":")]) + for _, host := range hosts { + if isPrivateIP(net.ParseIP(host)) { + return nil, errors.New("requests to private IPs are blocked") + } + } + conn, err := net.Dial(network, addr) + if err != nil { + return nil, err + } + return conn, err + } + client = &http.Client{Jar: cookies} + + var papers Papers + papers.List = make(map[string]map[string]*Paper) + + flag.StringVar(&scihubURL, "sci-hub", "https://sci-hub.se/", "Sci-Hub URL") + flag.StringVar(&papers.Path, "path", "./papers", "Absolute or relative path to papers folder") + flag.StringVar(&host, "host", "127.0.0.1", "IP address to listen on") + flag.Uint64Var(&port, "port", 9090, "Port to listen on") + flag.StringVar(&user, "user", "", "Username for /admin/ endpoints (optional)") + flag.StringVar(&pass, "pass", "", "Password for /admin/ endpoints (optional)") + flag.Parse() + + papers.Path, _ = filepath.Abs(papers.Path) + + if _, err := os.Stat(papers.Path); os.IsNotExist(err) { + os.Mkdir(papers.Path, os.ModePerm) + } + if err := papers.PopulatePapers(); err != nil { + panic(err) + } + if net.ParseIP(host) == nil { + panic(errors.New("Host flag could not be parsed; is it an IP address?")) + } + + // prefer system-installed template assets over project-local paths + if _, err := os.Stat(filepath.Join(buildPrefix, "/share/crane/templates")); err != nil { + dir, err := filepath.Abs(filepath.Dir(os.Args[0])) + if err != nil { + log.Fatal(err) + } + templateDir = filepath.Join(dir, "templates") + } else { + templateDir = filepath.Join(buildPrefix, "/share/crane/templates") + } + + http.HandleFunc("/", papers.IndexHandler) + http.HandleFunc("/admin/", papers.AdminHandler) + http.HandleFunc("/admin/edit/", papers.EditHandler) + http.HandleFunc("/admin/add/", papers.AddHandler) + http.HandleFunc("/download/", papers.DownloadHandler) + fmt.Printf("Listening on %v port %v (http://%v:%v/)\n", host, port, host, port) + log.Fatal(http.ListenAndServe(fmt.Sprintf("%s:%d", host, port), nil)) +} @@ -0,0 +1,5 @@ +module crane + +go 1.14 + +require golang.org/x/net v0.0.0-20210119194325-5f4716e94777 @@ -0,0 +1,6 @@ +golang.org/x/net v0.0.0-20210119194325-5f4716e94777 h1:003p0dJM77cxMSyCPFphvZf/Y5/NXf5fzg6ufd1/Oew= +golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/screenshots/admin.png b/screenshots/admin.png Binary files differnew file mode 100644 index 0000000..bbbb96b --- /dev/null +++ b/screenshots/admin.png diff --git a/templates/admin-edit.html b/templates/admin-edit.html new file mode 100644 index 0000000..58d93f5 --- /dev/null +++ b/templates/admin-edit.html @@ -0,0 +1,69 @@ +{{ template "layout.html" . }} +{{ define "content" }} +<p>{{ .Status }}</p> +<table id='header'> + <tr> + {{ $categoryCount := len .Papers }} + {{ if gt $categoryCount 0 }} + <td class='inpt'> + <form method='post' action='/admin/edit/'> + <input type="text" id="rename-category" name="rename-to" placeholder="Mathematics"> + <select class="btn" name="rename-category" id="category"> + {{ range $category, $papers := .Papers }} + <option value="{{ $category }}">{{ $category }}</option> + {{ end }} + </select> + <input class='btn' type="submit" value="Rename Category"> + </form> + </td> + {{ end }} + </tr> +</table> +<table class='tabs'> + <tr> + <td><a class='active' href='/admin/'>Back</a></td> + </tr> +</table> +<div class='content'> +{{ if gt $categoryCount 0 }} +<form method='post' action='/admin/edit/'> + <select class="btn" name="action" id="Action"> + <optgroup label="Action"> + <option value="delete">Delete</option> + </optgroup> + <optgroup label="Move To"> + {{ range $category, $papers := .Papers }} + <option value="move-{{ $category }}">{{ $category }}</option> + {{ end }} + </optgroup> + </select> + <input class='btn' type="submit" value="Save" /> +<table summary='paper list' class='list nowrap'> +{{ range $category, $papers := .Papers }} + {{ $paperCount := len $papers }} + <tr class='nohover-highlight'> + <td colspan='4' class='papersection'> + <input type="checkbox" id="{{ $category }}" name="category" value="{{ $category }}"> + <label for="{{ $category }}">{{ $category }}</label> + </td> + </tr> + {{ range $path, $paper := $papers }} + <tr> + {{ if $paper.Meta.Title }} + <td class="sublevel-paper title"><input type="checkbox" id="{{ $path }}" name="paper" value="{{ $path }}"><label for="{{ $path }}"><a href='/download/{{ $path }}' title='{{ $paper.Meta.Title }}'>{{ $paper.Meta.Title }}</a></label></td> + {{ else }} + <td class="sublevel-paper title"><input type="checkbox" id="{{ $path }}" name="paper" value="{{ $path }}"><label for="{{ $path }}"><a href='/download/{{ $path }}' title='{{ $paper.PaperName }}'>{{ $paper.PaperName }}</a></label></td> + {{ end }} + </tr> + <tr> + {{ if $paper.Meta.Title }} + <td class="sub">author: {{ $contCount := len $paper.Meta.Contributors }} {{ if gt $contCount 0 }}{{ $author := index $paper.Meta.Contributors 0 }}<a href='/?a={{ $author.LastName }}'>{{ $author.LastName }}{{ if gt $contCount 1 }} et al.{{ end }}{{ end }}</a> yr: <a href="?y={{ $paper.Meta.PubYear }}">{{ $paper.Meta.PubYear }}</a> doi: <a href="https://doi.org/{{ $paper.Meta.DOI }}">{{ $paper.Meta.DOI }}</a> journal: <font color="black">{{ $paper.Meta.Journal }}</font></td> + {{ else }} + <td></td> + {{ end }} + </tr> + {{ end }} +{{ end }} +{{ end }} +</table></form></div> +{{ end }} diff --git a/templates/admin.html b/templates/admin.html new file mode 100644 index 0000000..8c5222c --- /dev/null +++ b/templates/admin.html @@ -0,0 +1,50 @@ +{{ template "layout.html" . }} +{{ define "content" }} +{{ if .LastPaperDL }} +<p>{{ .Status }} (<a style="text-decoration: underline;" href="/download/{{ .LastPaperDL }}">download</a>)</p> +{{ else }} +<p>{{ .Status }}</p> +{{ end }} +<table id='header'> + <tr> + <td class='inpt'> + <form method='post' action='/admin/add/'> + <input type='text' name='new-category' placeholder="Mathematics" value=''/> + <input class='btn' type="submit" value="New Category" /> + </form> + </td> + </tr> + {{ $categoryCount := len .Papers }} + {{ if gt $categoryCount 0 }} + <tr> + <td class='inpt'> + <form method='post' action='/admin/add/'> + <input type='text' name='dl-paper' placeholder="URL or DOI" value=''/> + <select class="btn" name="dl-category" id="category"> + {{ $lastUsedCategory := .LastUsedCategory }} + {{ if $lastUsedCategory }} + <option value="{{ .LastUsedCategory }}">{{ $lastUsedCategory }}</option> + {{ end }} + {{ range $category, $papers := .Papers }} + {{ if ne $category $lastUsedCategory }} + <option value="{{ $category }}">{{ $category }}</option> + {{ end }} + {{ end }} + </select> + <input class='btn' type="submit" value="Download" /> + </form> + </td> + </tr> + {{ end }} +</table> +{{ if gt $categoryCount 0 }} +<table class='tabs'> + <tr> + <td><a class='active' href='/admin/edit/'>Edit</a></td> + </tr> +</table> +<div class='content'> +{{ block "list" . }}{{ end }} +</div> +{{ end }} +{{ end }} diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..7de21f7 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,16 @@ +{{ template "layout.html" . }} +{{ define "content" }} +<table id='header'> +</table> +{{ $categoryCount := len .Papers }} +{{ if gt $categoryCount 0 }} +<table class='tabs'> + <td><a class='active' href='/admin/'>Manage</a></td> +</table> +<div class='content'> +{{ block "list" . }}{{ end }} +</div> +{{ else }} +<p>nothing here yet, <a style="text-decoration:underline;" href="/admin/">create a category</a> to start downloading papers</p> +{{ end }} +{{ end }} diff --git a/templates/layout.html b/templates/layout.html new file mode 100644 index 0000000..26130d0 --- /dev/null +++ b/templates/layout.html @@ -0,0 +1,296 @@ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8"/> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<title>Crane</title> +<style> +@media (min-width: 601px) { + div#crane { + font-size: 16px; + } +} +@media (max-width: 600px) { + div#crane { + font-size: 14px; + } +} + +div#crane { + font-family: monospace; + padding: 0em; + margin: 0em; + background: white; + padding: 4px; +} + +div#crane a { + color: blue; + text-decoration: none; +} + +div#crane table#header input[type="text"] { + font-family: inherit; + font-size: 100%; + display: block; + width: calc(100% - 1rem); + border: 1px solid #888; + border-color: #888; + padding: .375rem; + background-color: #fff; + background-clip: padding-box; + margin-bottom: .5em; + line-height: 1.5; + transition: border-color .15s ease-in-out,box-shadow .15s ease-in-out; +} + +div#crane table#header .btn { + display: inline-block; + margin-bottom: 1rem; +} + +div#crane .btn { + font-family: inherit; + font-size: 85%; + color: black; + text-overflow: unset; + display: inline; + background: #e9ecef; + border: #343a40 1px solid; + padding: 2px 8px; + border-radius: 0; +} + +div#crane a:hover { + text-decoration: underline; +} +div#crane a { + color: #000; +} + +div#crane table { + border-collapse: collapse; +} + +div#crane table#header { + width: 100%; + margin-bottom: 1em; +} + +div#crane table#header td.inpt { + padding-left: 10px; + white-space: nowrap; + border-collapse: collapse; + color: #000; +} + +div#crane table#header td.main { + font-size: 250%; + padding-left: 10px; + white-space: nowrap; + border-collapse: collapse; +} + +div#crane table#header td.form { + text-align: left; + vertical-align: bottom; + padding-bottom: 2px; + white-space: nowrap; +} + +div#crane table#header td.form form, +div#crane table#header td.form input, +div#crane table#header td.form select { +} + +div#crane table#header td.sub { + color: #777; + border-top: solid 1px #ccc; + padding-left: 10px; +} + +div#crane table.tabs { + border-bottom: solid 2px #ccc; + border-collapse: nowrap; + margin-top: 1em; + margin-bottom: 0px; + width: 100%; +} + +div#crane table.tabs td { + padding: 0px 0px 0px; + vertical-align: bottom; +} + +div#crane table.tabs td a { + font-size: 90%; + padding: 2px 0.75em; + color: black; + background-color: #ccc; +} + +div#crane table.tabls td.form { + text-align: right; +} + +div#crane table.tabs td.form form { + padding-bottom: 2px; + white-space: normal; + padding-left: 1em; +} + +div#crane table.tabs td.form input, +div#crane table.tabs td.form select { +} + +div#crane div.content { + margin-top: 1em; + padding-bottom: 1em; + border-bottom: solid 2px #ccc; +} + +div#crane table.list { + width: 100%; + border: none; + border-collapse: collapse; +} + +div#crane table.list tr { + background: white; +} + +div#crane table.list:first-child tr:first-child td:first-child { + padding-top: 0em; +} + +div#crane table.list td.sub { + padding-left: 4em; + font-size: 80%; +} + +div#crane table.list th { + font-weight: bold; + /* color: #888; + border-top: dashed 1px #888; + border-bottom: dashed 1px #888; + */ + padding: 0.1em 0.5em 0.05em 0.5em; + vertical-align: baseline; +} + +div#crane table.list td { + border: none; + padding: 0.1em 0.5em 0.1em 0.5em; +} + +div#crane table.list td.title { + text-decoration: underline; + padding-left: 1.5em; +} + +div#crane table.list td.authors { + max-width: 10em; +} + +div#crane table.list td.year { + max-width: 5em; +} + +div#crane table.list td.doi { + max-width: 5em; +} + +div#crane table.list td a { + color: black; +} + +div#crane table.list td a:hover { + color: #00f; +} + +div#crane img { + border: none; +} + +div#crane td#content { + vertical-align: top; + padding: 1em 2em 1em 1em; + border: none; +} + +div#crane table.nowrap td { + white-space: normal; +} + +div#crane .left { + text-align: left; +} + +div#crane .right { + text-align: right; +} + +div#crane table.list td.papersection { + font-weight: bold; + padding-top: 1em; + padding-bottom: .5em; + color: black; +} + +div#crane table.list td.sublevel-paper { +} + +div#crane div.footer { + margin-top: 0.5em; + text-align: center; + font-size: 80%; + color: #ccc; +} + +div#crane div.footer a { + color: #ccc; + text-decoration: none; +} + +div#crane div.footer a:hover { + text-decoration: underline; +} + +* { line-height: 1.25em; } + +div#crane { + max-width: 55em; + margin: auto; + -moz-tab-size: 4; + tab-size: 4; +} + +div#crane table.list th a { + color: inherit; +} + +div#crane table.list tr.nohover-highlight:hover:nth-child(even) { + background: inherit; +} + +div#crane table.blob td.linenumbers a:target { + color: goldenrod; + text-decoration: underline; + outline: none; +} + +</style> +</head> +<body> +<center> +<div id='crane'> +<table id='header'> + <tr><td class='main'><a href="/">Crane</a></td></tr> + <tr><td class='sub'>Research literature archival and categorization</td></tr> +</table> +{{ block "content" . }}{{ end }} +<div class='footer'><a href='https://git.jordan.im/crane'>crane</a></div> +</div> +</center> +</body> +</html> diff --git a/templates/list.html b/templates/list.html new file mode 100644 index 0000000..718a732 --- /dev/null +++ b/templates/list.html @@ -0,0 +1,29 @@ +{{ define "list" }} +<table summary='paper list' class='list nowrap'> +{{ range $category, $papers := .Papers }} + {{ $paperCount := len $papers }} + {{ if ge $paperCount 1 }} + <tr class='nohover-highlight'> + <td colspan='4' class='papersection'>{{ $category }}</td> + </tr> + {{ range $path, $paper := $papers }} + <tr> + {{ if $paper.Meta.Title }} + <td class="sublevel-paper title"><a href='/download/{{ $path }}' title='{{ $paper.Meta.Title }}'>{{ $paper.Meta.Title }}</a></td> + {{ else }} + <td class="sublevel-paper title"><a href='/download/{{ $path }}' title='{{ $paper.PaperName }}'>{{ $paper.PaperName }}</a></td> + {{ end }} + </tr> + <tr> + {{ if $paper.Meta.Title }} + <td class="sub">author: {{ $contCount := len $paper.Meta.Contributors }} {{ if gt $contCount 0 }}{{ $author := index $paper.Meta.Contributors 0 }}<a href='/?a={{ $author.LastName }}'>{{ $author.LastName }}{{ if gt $contCount 1 }} et al.{{ end }}{{ end }}</a> yr: <a href="?y={{ $paper.Meta.PubYear }}">{{ $paper.Meta.PubYear }}</a> doi: <a href="https://doi.org/{{ $paper.Meta.DOI }}">{{ $paper.Meta.DOI }}</a> journal: <font color="black">{{ $paper.Meta.Journal }}</font></td> + {{ else }} + <td></td> + {{ end }} + </tr> + {{ end }} + {{ else }} + {{ end }} +{{ end }} +</table> +{{ end }} @@ -0,0 +1,237 @@ +package main + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "net" + "net/http" + "os" + "regexp" +) + +var privateIPBlocks []*net.IPNet + +// isPrivateIP checks to if the provided IP address is a loopback, link-local +// or unique-local address +// +// credit: https://stackoverflow.com/a/50825191 +func isPrivateIP(ip net.IP) bool { + if privateIPBlocks == nil { + for _, cidr := range []string{ + "127.0.0.0/8", // IPv4 loopback + "10.0.0.0/8", // RFC1918 + "172.16.0.0/12", // RFC1918 + "192.168.0.0/16", // RFC1918 + "169.254.0.0/16", // RFC3927 link-local + "::1/128", // IPv6 loopback + "fe80::/10", // IPv6 link-local + "fc00::/7", // IPv6 unique local addr + } { + _, block, err := net.ParseCIDR(cidr) + if err != nil { + panic(fmt.Errorf("parse error on %q: %v", cidr, err)) + } + privateIPBlocks = append(privateIPBlocks, block) + } + } + if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { + return true + } + for _, block := range privateIPBlocks { + if block.Contains(ip) { + return true + } + } + return false +} + +// getDOIFromBytes returns the DOI parsed from the provided []byte slice +func getDOIFromBytes(b []byte) []byte { + re := regexp.MustCompile(`(10[.][0-9]{4,}[^\s"/<>]*/[^\s"'<>,\{\};:\[\]\?&]+)`) + return re.Find(b) +} + +// makeRequest makes a request to a remote resource using the provided +// *http.Client and returns its *http.Response +func makeRequest(client *http.Client, u string) (*http.Response, error) { + req, err := http.NewRequest("GET", u, nil) + + // sciencedirect and company block atypical user agents + req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0") + + resp, err := client.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("%q: status code not OK", u) + } + return resp, nil +} + +// getDOIFromPage returns the parsed DOI from the body of the *http.Response +// provided +func getDOIFromPage(resp *http.Response) []byte { + defer resp.Body.Close() + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + doi := getDOIFromBytes(scanner.Bytes()) + if doi != nil { + return doi + } + if err := scanner.Err(); err != nil { + return nil + } + } + return nil +} + +// renameFile is an alternative to os.Rename which supports moving files +// between devices where os.Rename would return an error (cross-device link) +func renameFile(src string, dst string) (err error) { + if src == dst { + return nil + } + err = copyFile(src, dst) + if err != nil { + return fmt.Errorf("failed to copy source file %s to %s: %s", src, dst, err) + } + err = os.RemoveAll(src) + if err != nil { + return fmt.Errorf("failed to cleanup source file %s: %s", src, err) + } + return nil +} + +// copyFile copies a file located at src to dst, used by renameFile() +// +// credit: https://gist.github.com/r0l1/92462b38df26839a3ca324697c8cba04 +func copyFile(src, dst string) (err error) { + in, err := os.Open(src) + if err != nil { + return + } + defer in.Close() + + out, err := os.Create(dst) + if err != nil { + return + } + defer func() { + if e := out.Close(); e != nil { + err = e + } + }() + + _, err = io.Copy(out, in) + if err != nil { + return + } + + err = out.Sync() + if err != nil { + return + } + + si, err := os.Stat(src) + if err != nil { + return + } + err = os.Chmod(dst, si.Mode()) + if err != nil { + return + } + + return +} + +// getMetaFromDOI saves doi.org API data to TempFile and returns its path +func getMetaFromDOI(client *http.Client, doi []byte) (string, error) { + u := "https://doi.org/" + string(doi) + req, err := http.NewRequest("GET", u, nil) + + req.Header.Add("Accept", "application/vnd.crossref.unixref+xml;q=1,application/rdf+xml;q=0.5") + resp, err := client.Do(req) + if err != nil { + return "", err + } + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("%q: status code not OK, DOI invalid?", u) + } + if resp.Header.Get("Content-Type") != "application/vnd.crossref.unixref+xml" { + return "", fmt.Errorf("%q: content-type not application/vnd.crossref.unixref+xml", u) + } + if err != nil { + return "", err + } + + // create a temporary file to store XML stream + tmpXML, err := ioutil.TempFile("", "tmp-*.meta.xml") + if err != nil { + return "", err + } + + // incrementally save XML data to the temporary file; saves memory using + // the filesystem instead of passing around buffers + if err := saveRespBody(resp, tmpXML.Name()); err != nil { + return "", err + } + if err := tmpXML.Close(); err != nil { + return "", err + } + return tmpXML.Name(), nil +} + +// getPaper saves makes an outbound request to a remote resource and saves the +// response body to a temporary file, returning its path, provided the response +// has the content-type application/pdf +func getPaper(client *http.Client, u string) (string, error) { + req, err := http.NewRequest("GET", u, nil) + + // sci-hub gives us the paper directly (no iframe) if we're on mobile + req.Header.Add("User-Agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 13_7 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Mobile/15E148 Safari/604.1") + + resp, err := client.Do(req) + if err != nil { + return "", err + } + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("%q: status code not OK", u) + } + if resp.Header.Get("Content-Type") != "application/pdf" { + return "", fmt.Errorf("%q: content-type not application/pdf", u) + } + tmpPDF, err := ioutil.TempFile("", "tmp-*.pdf") + if err != nil { + return "", err + } + + // write resp.Body (paper data) to tmpPDF + if err := saveRespBody(resp, tmpPDF.Name()); err != nil { + return "", err + } + if err := tmpPDF.Close(); err != nil { + return "", err + } + return tmpPDF.Name(), nil +} + +// saveRespBody writes the provided http.Response to path +func saveRespBody(resp *http.Response, path string) error { + out, err := os.Create(path) + if err != nil { + return err + } + defer out.Close() + + r := http.MaxBytesReader(nil, resp.Body, MAX_SIZE) + _, err = io.Copy(out, r) + if err != nil { + return err + } + return nil +} |