diff options
Diffstat (limited to 'util.go')
-rw-r--r-- | util.go | 237 |
1 files changed, 237 insertions, 0 deletions
@@ -0,0 +1,237 @@ +package main + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "net" + "net/http" + "os" + "regexp" +) + +var privateIPBlocks []*net.IPNet + +// isPrivateIP checks to if the provided IP address is a loopback, link-local +// or unique-local address +// +// credit: https://stackoverflow.com/a/50825191 +func isPrivateIP(ip net.IP) bool { + if privateIPBlocks == nil { + for _, cidr := range []string{ + "127.0.0.0/8", // IPv4 loopback + "10.0.0.0/8", // RFC1918 + "172.16.0.0/12", // RFC1918 + "192.168.0.0/16", // RFC1918 + "169.254.0.0/16", // RFC3927 link-local + "::1/128", // IPv6 loopback + "fe80::/10", // IPv6 link-local + "fc00::/7", // IPv6 unique local addr + } { + _, block, err := net.ParseCIDR(cidr) + if err != nil { + panic(fmt.Errorf("parse error on %q: %v", cidr, err)) + } + privateIPBlocks = append(privateIPBlocks, block) + } + } + if ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { + return true + } + for _, block := range privateIPBlocks { + if block.Contains(ip) { + return true + } + } + return false +} + +// getDOIFromBytes returns the DOI parsed from the provided []byte slice +func getDOIFromBytes(b []byte) []byte { + re := regexp.MustCompile(`(10[.][0-9]{4,}[^\s"/<>]*/[^\s"'<>,\{\};:\[\]\?&]+)`) + return re.Find(b) +} + +// makeRequest makes a request to a remote resource using the provided +// *http.Client and returns its *http.Response +func makeRequest(client *http.Client, u string) (*http.Response, error) { + req, err := http.NewRequest("GET", u, nil) + + // sciencedirect and company block atypical user agents + req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0") + + resp, err := client.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("%q: status code not OK", u) + } + return resp, nil +} + +// getDOIFromPage returns the parsed DOI from the body of the *http.Response +// provided +func getDOIFromPage(resp *http.Response) []byte { + defer resp.Body.Close() + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + doi := getDOIFromBytes(scanner.Bytes()) + if doi != nil { + return doi + } + if err := scanner.Err(); err != nil { + return nil + } + } + return nil +} + +// renameFile is an alternative to os.Rename which supports moving files +// between devices where os.Rename would return an error (cross-device link) +func renameFile(src string, dst string) (err error) { + if src == dst { + return nil + } + err = copyFile(src, dst) + if err != nil { + return fmt.Errorf("failed to copy source file %s to %s: %s", src, dst, err) + } + err = os.RemoveAll(src) + if err != nil { + return fmt.Errorf("failed to cleanup source file %s: %s", src, err) + } + return nil +} + +// copyFile copies a file located at src to dst, used by renameFile() +// +// credit: https://gist.github.com/r0l1/92462b38df26839a3ca324697c8cba04 +func copyFile(src, dst string) (err error) { + in, err := os.Open(src) + if err != nil { + return + } + defer in.Close() + + out, err := os.Create(dst) + if err != nil { + return + } + defer func() { + if e := out.Close(); e != nil { + err = e + } + }() + + _, err = io.Copy(out, in) + if err != nil { + return + } + + err = out.Sync() + if err != nil { + return + } + + si, err := os.Stat(src) + if err != nil { + return + } + err = os.Chmod(dst, si.Mode()) + if err != nil { + return + } + + return +} + +// getMetaFromDOI saves doi.org API data to TempFile and returns its path +func getMetaFromDOI(client *http.Client, doi []byte) (string, error) { + u := "https://doi.org/" + string(doi) + req, err := http.NewRequest("GET", u, nil) + + req.Header.Add("Accept", "application/vnd.crossref.unixref+xml;q=1,application/rdf+xml;q=0.5") + resp, err := client.Do(req) + if err != nil { + return "", err + } + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("%q: status code not OK, DOI invalid?", u) + } + if resp.Header.Get("Content-Type") != "application/vnd.crossref.unixref+xml" { + return "", fmt.Errorf("%q: content-type not application/vnd.crossref.unixref+xml", u) + } + if err != nil { + return "", err + } + + // create a temporary file to store XML stream + tmpXML, err := ioutil.TempFile("", "tmp-*.meta.xml") + if err != nil { + return "", err + } + + // incrementally save XML data to the temporary file; saves memory using + // the filesystem instead of passing around buffers + if err := saveRespBody(resp, tmpXML.Name()); err != nil { + return "", err + } + if err := tmpXML.Close(); err != nil { + return "", err + } + return tmpXML.Name(), nil +} + +// getPaper saves makes an outbound request to a remote resource and saves the +// response body to a temporary file, returning its path, provided the response +// has the content-type application/pdf +func getPaper(client *http.Client, u string) (string, error) { + req, err := http.NewRequest("GET", u, nil) + + // sci-hub gives us the paper directly (no iframe) if we're on mobile + req.Header.Add("User-Agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 13_7 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Mobile/15E148 Safari/604.1") + + resp, err := client.Do(req) + if err != nil { + return "", err + } + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("%q: status code not OK", u) + } + if resp.Header.Get("Content-Type") != "application/pdf" { + return "", fmt.Errorf("%q: content-type not application/pdf", u) + } + tmpPDF, err := ioutil.TempFile("", "tmp-*.pdf") + if err != nil { + return "", err + } + + // write resp.Body (paper data) to tmpPDF + if err := saveRespBody(resp, tmpPDF.Name()); err != nil { + return "", err + } + if err := tmpPDF.Close(); err != nil { + return "", err + } + return tmpPDF.Name(), nil +} + +// saveRespBody writes the provided http.Response to path +func saveRespBody(resp *http.Response, path string) error { + out, err := os.Create(path) + if err != nil { + return err + } + defer out.Close() + + r := http.MaxBytesReader(nil, resp.Body, MAX_SIZE) + _, err = io.Copy(out, r) + if err != nil { + return err + } + return nil +} |