aboutsummaryrefslogtreecommitdiff
path: root/util.go
diff options
context:
space:
mode:
Diffstat (limited to 'util.go')
-rw-r--r--util.go237
1 files changed, 237 insertions, 0 deletions
diff --git a/util.go b/util.go
new file mode 100644
index 0000000..93bf0a6
--- /dev/null
+++ b/util.go
@@ -0,0 +1,237 @@
+package main
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "net"
+ "net/http"
+ "os"
+ "regexp"
+)
+
+var privateIPBlocks []*net.IPNet
+
+// isPrivateIP checks to if the provided IP address is a loopback, link-local
+// or unique-local address
+//
+// credit: https://stackoverflow.com/a/50825191
+func isPrivateIP(ip net.IP) bool {
+ if privateIPBlocks == nil {
+ for _, cidr := range []string{
+ "127.0.0.0/8", // IPv4 loopback
+ "10.0.0.0/8", // RFC1918
+ "172.16.0.0/12", // RFC1918
+ "192.168.0.0/16", // RFC1918
+ "169.254.0.0/16", // RFC3927 link-local
+ "::1/128", // IPv6 loopback
+ "fe80::/10", // IPv6 link-local
+ "fc00::/7", // IPv6 unique local addr
+ } {
+ _, block, err := net.ParseCIDR(cidr)
+ if err != nil {
+ panic(fmt.Errorf("parse error on %q: %v", cidr, err))
+ }
+ privateIPBlocks = append(privateIPBlocks, block)
+ }
+ }
+ if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
+ return true
+ }
+ for _, block := range privateIPBlocks {
+ if block.Contains(ip) {
+ return true
+ }
+ }
+ return false
+}
+
+// getDOIFromBytes returns the DOI parsed from the provided []byte slice
+func getDOIFromBytes(b []byte) []byte {
+ re := regexp.MustCompile(`(10[.][0-9]{4,}[^\s"/<>]*/[^\s"'<>,\{\};:\[\]\?&]+)`)
+ return re.Find(b)
+}
+
+// makeRequest makes a request to a remote resource using the provided
+// *http.Client and returns its *http.Response
+func makeRequest(client *http.Client, u string) (*http.Response, error) {
+ req, err := http.NewRequest("GET", u, nil)
+
+ // sciencedirect and company block atypical user agents
+ req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0")
+
+ resp, err := client.Do(req)
+ if err != nil {
+ return nil, err
+ }
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("%q: status code not OK", u)
+ }
+ return resp, nil
+}
+
+// getDOIFromPage returns the parsed DOI from the body of the *http.Response
+// provided
+func getDOIFromPage(resp *http.Response) []byte {
+ defer resp.Body.Close()
+ scanner := bufio.NewScanner(resp.Body)
+ for scanner.Scan() {
+ doi := getDOIFromBytes(scanner.Bytes())
+ if doi != nil {
+ return doi
+ }
+ if err := scanner.Err(); err != nil {
+ return nil
+ }
+ }
+ return nil
+}
+
+// renameFile is an alternative to os.Rename which supports moving files
+// between devices where os.Rename would return an error (cross-device link)
+func renameFile(src string, dst string) (err error) {
+ if src == dst {
+ return nil
+ }
+ err = copyFile(src, dst)
+ if err != nil {
+ return fmt.Errorf("failed to copy source file %s to %s: %s", src, dst, err)
+ }
+ err = os.RemoveAll(src)
+ if err != nil {
+ return fmt.Errorf("failed to cleanup source file %s: %s", src, err)
+ }
+ return nil
+}
+
+// copyFile copies a file located at src to dst, used by renameFile()
+//
+// credit: https://gist.github.com/r0l1/92462b38df26839a3ca324697c8cba04
+func copyFile(src, dst string) (err error) {
+ in, err := os.Open(src)
+ if err != nil {
+ return
+ }
+ defer in.Close()
+
+ out, err := os.Create(dst)
+ if err != nil {
+ return
+ }
+ defer func() {
+ if e := out.Close(); e != nil {
+ err = e
+ }
+ }()
+
+ _, err = io.Copy(out, in)
+ if err != nil {
+ return
+ }
+
+ err = out.Sync()
+ if err != nil {
+ return
+ }
+
+ si, err := os.Stat(src)
+ if err != nil {
+ return
+ }
+ err = os.Chmod(dst, si.Mode())
+ if err != nil {
+ return
+ }
+
+ return
+}
+
+// getMetaFromDOI saves doi.org API data to TempFile and returns its path
+func getMetaFromDOI(client *http.Client, doi []byte) (string, error) {
+ u := "https://doi.org/" + string(doi)
+ req, err := http.NewRequest("GET", u, nil)
+
+ req.Header.Add("Accept", "application/vnd.crossref.unixref+xml;q=1,application/rdf+xml;q=0.5")
+ resp, err := client.Do(req)
+ if err != nil {
+ return "", err
+ }
+
+ if resp.StatusCode != http.StatusOK {
+ return "", fmt.Errorf("%q: status code not OK, DOI invalid?", u)
+ }
+ if resp.Header.Get("Content-Type") != "application/vnd.crossref.unixref+xml" {
+ return "", fmt.Errorf("%q: content-type not application/vnd.crossref.unixref+xml", u)
+ }
+ if err != nil {
+ return "", err
+ }
+
+ // create a temporary file to store XML stream
+ tmpXML, err := ioutil.TempFile("", "tmp-*.meta.xml")
+ if err != nil {
+ return "", err
+ }
+
+ // incrementally save XML data to the temporary file; saves memory using
+ // the filesystem instead of passing around buffers
+ if err := saveRespBody(resp, tmpXML.Name()); err != nil {
+ return "", err
+ }
+ if err := tmpXML.Close(); err != nil {
+ return "", err
+ }
+ return tmpXML.Name(), nil
+}
+
+// getPaper saves makes an outbound request to a remote resource and saves the
+// response body to a temporary file, returning its path, provided the response
+// has the content-type application/pdf
+func getPaper(client *http.Client, u string) (string, error) {
+ req, err := http.NewRequest("GET", u, nil)
+
+ // sci-hub gives us the paper directly (no iframe) if we're on mobile
+ req.Header.Add("User-Agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 13_7 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Mobile/15E148 Safari/604.1")
+
+ resp, err := client.Do(req)
+ if err != nil {
+ return "", err
+ }
+
+ if resp.StatusCode != http.StatusOK {
+ return "", fmt.Errorf("%q: status code not OK", u)
+ }
+ if resp.Header.Get("Content-Type") != "application/pdf" {
+ return "", fmt.Errorf("%q: content-type not application/pdf", u)
+ }
+ tmpPDF, err := ioutil.TempFile("", "tmp-*.pdf")
+ if err != nil {
+ return "", err
+ }
+
+ // write resp.Body (paper data) to tmpPDF
+ if err := saveRespBody(resp, tmpPDF.Name()); err != nil {
+ return "", err
+ }
+ if err := tmpPDF.Close(); err != nil {
+ return "", err
+ }
+ return tmpPDF.Name(), nil
+}
+
+// saveRespBody writes the provided http.Response to path
+func saveRespBody(resp *http.Response, path string) error {
+ out, err := os.Create(path)
+ if err != nil {
+ return err
+ }
+ defer out.Close()
+
+ r := http.MaxBytesReader(nil, resp.Body, MAX_SIZE)
+ _, err = io.Copy(out, r)
+ if err != nil {
+ return err
+ }
+ return nil
+}