aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2022-08-22 14:48:10 -0700
committerJordan <me@jordan.im>2022-08-22 14:48:10 -0700
commit4d69cf999805b9bbe72e9ea5168905546208a345 (patch)
tree85cf13702e023ea9df7b2fb4f92f6b7938277d9a
parent308422ae43d3332f2dd71ae88ec0158d877eb807 (diff)
downloadcrane-4d69cf999805b9bbe72e9ea5168905546208a345.tar.gz
crane-4d69cf999805b9bbe72e9ea5168905546208a345.zip
crane, util: use url.ResolveReference to build resource paths
-rw-r--r--crane.go23
-rw-r--r--util.go26
2 files changed, 26 insertions, 23 deletions
diff --git a/crane.go b/crane.go
index e3ef9ad..a9d8601 100644
--- a/crane.go
+++ b/crane.go
@@ -30,7 +30,7 @@ const (
var (
client *http.Client
- scihubURL string
+ scihubURL *url.URL
host string
port uint64
user string
@@ -244,8 +244,9 @@ func (papers *Papers) NewPaperFromDOI(doi []byte, category string) (*Paper,
tmpXML.Close()
name := getPaperFileNameFromMeta(meta) // doe2020
+
+ // last-resort if metadata lacking author or publication year
if name == "" {
- // last-resort condition if metadata lacking author or publication year
name = strings.Replace(string(doi), "..", "", -1)
name = strings.Replace(string(doi), "/", "", -1)
}
@@ -253,7 +254,7 @@ func (papers *Papers) NewPaperFromDOI(doi []byte, category string) (*Paper,
// doe2020-(2, 3, 4...) if n already exists in set
uniqueName := papers.getUniqueName(category, name)
- // if not matching, check if DOIs match (genuine duplicate)
+ // check if DOIs match (genuine duplicate)
if name != uniqueName {
key := filepath.Join(category, name+".pdf")
papers.RLock()
@@ -271,16 +272,13 @@ func (papers *Papers) NewPaperFromDOI(doi []byte, category string) (*Paper,
paper.PaperName+".meta.xml")
// make outbound request to sci-hub, save paper to temporary location
- url := scihubURL + string(doi)
- tmpPDF, err := getPaper(client, url)
+ tmpPDF, err := getPaper(client, scihubURL, string(doi))
defer os.Remove(tmpPDF)
if err != nil {
// try passing resource URL (from doi.org metadata) to sci-hub instead
// (force cache)
if meta.Resource != "" {
- url = scihubURL + meta.Resource
- tmpPDF, err = getPaper(client, url)
- if err != nil {
+ if tmpPDF, err = getPaper(client, scihubURL, meta.Resource); err != nil {
return nil, err
}
} else {
@@ -610,7 +608,9 @@ func main() {
var papers Papers
papers.List = make(map[string]map[string]*Paper)
- flag.StringVar(&scihubURL, "sci-hub", "https://sci-hub.se/", "Sci-Hub URL")
+ var scihub string
+
+ flag.StringVar(&scihub, "sci-hub", "https://sci-hub.se/", "Sci-Hub URL")
flag.StringVar(&papers.Path, "path", "./papers",
"Absolute or relative path to papers folder")
flag.StringVar(&host, "host", "127.0.0.1", "IP address to listen on")
@@ -621,8 +621,9 @@ func main() {
papers.Path, _ = filepath.Abs(papers.Path)
- if !strings.HasSuffix(scihubURL, "/") {
- scihubURL = scihubURL + "/"
+ scihubURL, err = url.Parse(scihub)
+ if err != nil {
+ panic(err)
}
if _, err := os.Stat(papers.Path); os.IsNotExist(err) {
os.Mkdir(papers.Path, os.ModePerm)
diff --git a/util.go b/util.go
index 8fa9c5f..035b519 100644
--- a/util.go
+++ b/util.go
@@ -240,6 +240,7 @@ func getMetaFromDOI(client *http.Client, doi []byte) (*Meta, error) {
if err != nil {
return nil, err
}
+
r := bufio.NewReader(resp.Body)
d := xml.NewDecoder(r)
@@ -254,19 +255,24 @@ func getMetaFromDOI(client *http.Client, doi []byte) (*Meta, error) {
// getPaper saves makes an outbound request to a remote resource and saves the
// response body to a temporary file, returning its path, provided the response
// has the content-type application/pdf
-func getPaper(client *http.Client, u string) (string, error) {
+func getPaper(client *http.Client, scihub *url.URL, resource string) (string, error) {
- resp, err := makeRequest(client, u)
+ ref, err := url.Parse(resource)
if err != nil {
return "", err
}
+ refURL := scihub.ResolveReference(ref) // scihub + resource
+ resp, err := makeRequest(client, refURL.String())
+ if err != nil {
+ return "", err
+ }
doc, err := html.Parse(resp.Body)
if err != nil {
return "", err
}
- var dl *url.URL
+ var directLink *url.URL
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode {
@@ -276,9 +282,8 @@ func getPaper(client *http.Client, u string) (string, error) {
if err != nil {
continue
}
- fmt.Println(_v.Path)
if strings.HasSuffix(_v.Path, "pdf") {
- dl = _v
+ directLink = scihub.ResolveReference(_v)
break
}
}
@@ -290,25 +295,22 @@ func getPaper(client *http.Client, u string) (string, error) {
}
f(doc)
- if dl == nil || dl.String() == "" {
- return "", fmt.Errorf("%q: could not locate PDF direct link", u)
+ if directLink == nil || directLink.String() == "" {
+ return "", fmt.Errorf("%q: could not locate PDF link", refURL.String())
}
- resp, err = makeRequest(client, dl.String())
+ resp, err = makeRequest(client, directLink.String())
if err != nil {
return "", err
}
-
if resp.Header.Get("content-type") != "application/pdf" {
- return "", fmt.Errorf("%q: parsed PDF direct link not application/pdf", u)
+ return "", fmt.Errorf("%q: content-type not application/pdf", refURL.String())
}
tmpPDF, err := ioutil.TempFile("", "tmp-*.pdf")
if err != nil {
return "", err
}
-
- // write resp.Body (paper data) to tmpPDF
if err := saveRespBody(resp, tmpPDF.Name()); err != nil {
return "", err
}