diff options
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | crane.go | 27 | ||||
-rw-r--r-- | templates/admin-edit.html | 2 | ||||
-rw-r--r-- | templates/admin.html | 2 | ||||
-rw-r--r-- | templates/index.html | 2 | ||||
-rw-r--r-- | templates/layout.html | 1 | ||||
-rw-r--r-- | util.go | 47 |
7 files changed, 40 insertions, 43 deletions
@@ -33,7 +33,7 @@ Usage of ./crane: -path string Absolute or relative path to papers folder (default "./papers") -sci-hub string - Sci-Hub URL (default "https://sci-hub.se/") + Sci-Hub URL (default "https://sci-hub.hkvisa.net/") -user string Username for /admin/ endpoints (optional) -pass string @@ -30,7 +30,7 @@ const ( var ( client *http.Client - scihubURL string + scihubURL *url.URL host string port uint64 user string @@ -145,7 +145,9 @@ func (papers *Papers) getUniqueName(category string, name string) string { func (papers *Papers) findPapersWalk(path string, info os.FileInfo, err error) error { // skip the papers.Path root directory - if p, _ := filepath.Abs(path); p == papers.Path { + if p, _ := filepath.Abs(path); p == papers.Path || + strings.HasPrefix(filepath.Base(path), ".") { + return nil } @@ -244,8 +246,9 @@ func (papers *Papers) NewPaperFromDOI(doi []byte, category string) (*Paper, tmpXML.Close() name := getPaperFileNameFromMeta(meta) // doe2020 + + // last-resort if metadata lacking author or publication year if name == "" { - // last-resort condition if metadata lacking author or publication year name = strings.Replace(string(doi), "..", "", -1) name = strings.Replace(string(doi), "/", "", -1) } @@ -253,7 +256,7 @@ func (papers *Papers) NewPaperFromDOI(doi []byte, category string) (*Paper, // doe2020-(2, 3, 4...) if n already exists in set uniqueName := papers.getUniqueName(category, name) - // if not matching, check if DOIs match (genuine duplicate) + // check if DOIs match (genuine duplicate) if name != uniqueName { key := filepath.Join(category, name+".pdf") papers.RLock() @@ -271,16 +274,13 @@ func (papers *Papers) NewPaperFromDOI(doi []byte, category string) (*Paper, paper.PaperName+".meta.xml") // make outbound request to sci-hub, save paper to temporary location - url := scihubURL + string(doi) - tmpPDF, err := getPaper(client, url) + tmpPDF, err := getPaper(client, scihubURL, string(doi)) defer os.Remove(tmpPDF) if err != nil { // try passing resource URL (from doi.org metadata) to sci-hub instead // (force cache) if meta.Resource != "" { - url = scihubURL + meta.Resource - tmpPDF, err = getPaper(client, url) - if err != nil { + if tmpPDF, err = getPaper(client, scihubURL, meta.Resource); err != nil { return nil, err } } else { @@ -610,7 +610,9 @@ func main() { var papers Papers papers.List = make(map[string]map[string]*Paper) - flag.StringVar(&scihubURL, "sci-hub", "https://sci-hub.se/", "Sci-Hub URL") + var scihub string + + flag.StringVar(&scihub, "sci-hub", "https://sci-hub.hkvisa.net/", "Sci-Hub URL") flag.StringVar(&papers.Path, "path", "./papers", "Absolute or relative path to papers folder") flag.StringVar(&host, "host", "127.0.0.1", "IP address to listen on") @@ -621,8 +623,9 @@ func main() { papers.Path, _ = filepath.Abs(papers.Path) - if !strings.HasSuffix(scihubURL, "/") { - scihubURL = scihubURL + "/" + scihubURL, err = url.Parse(scihub) + if err != nil { + panic(err) } if _, err := os.Stat(papers.Path); os.IsNotExist(err) { os.Mkdir(papers.Path, os.ModePerm) diff --git a/templates/admin-edit.html b/templates/admin-edit.html index d4c9196..972c2c7 100644 --- a/templates/admin-edit.html +++ b/templates/admin-edit.html @@ -30,7 +30,7 @@ <div class="cat-cont"> <div class="cat"> {{ range $category, $paper := .Papers.List }} - <span class="cat">[<a href="#{{ $category }}">{{ $category }}</a>]</span> + <span class="cat"><a href="#{{ $category }}">{{ $category }}</a></span> {{ end }} </div> </div> diff --git a/templates/admin.html b/templates/admin.html index fb3a3d2..2226a83 100644 --- a/templates/admin.html +++ b/templates/admin.html @@ -45,7 +45,7 @@ <div class="cat-cont"> <div class="cat"> {{ range $category, $paper := .Papers.List }} - <span class="cat">[<a href="#{{ $category }}">{{ $category }}</a>]</span> + <span class="cat"><a href="#{{ $category }}">{{ $category }}</a></span> {{ end }} </div> </div> diff --git a/templates/index.html b/templates/index.html index 861410d..727e727 100644 --- a/templates/index.html +++ b/templates/index.html @@ -7,7 +7,7 @@ <div class="cat-cont"> <div class="cat"> {{ range $category, $paper := .Papers.List }} - <span class="cat">[<a href="#{{ $category }}">{{ $category }}</a>]</span> + <span class="cat"><a href="#{{ $category }}">{{ $category }}</a></span> {{ end }} </div> </div> diff --git a/templates/layout.html b/templates/layout.html index a4c648f..9afe143 100644 --- a/templates/layout.html +++ b/templates/layout.html @@ -30,6 +30,7 @@ a.permalink { color: var(--ansi3); text-decoration: none; } div.paper { padding-bottom: 1em; } div.cat-cont { } div.cat { justify-content: space-between; display: flex; flex-wrap: wrap; } +div.cat a { margin-right: 1rem; } div.action { padding-bottom: 1em; margin-left: 1em; } span.doi a { text-decoration: none; } span.title a { text-decoration: underline; color: blue; } @@ -206,16 +206,6 @@ func copyFile(src, dst string) (err error) { if err != nil { return } - - si, err := os.Stat(src) - if err != nil { - return - } - err = os.Chmod(dst, si.Mode()) - if err != nil { - return - } - return } @@ -230,6 +220,7 @@ func getMetaFromDOI(client *http.Client, doi []byte) (*Meta, error) { if err != nil { return nil, err } + defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("%q: failed to get metadata", u) @@ -240,6 +231,7 @@ func getMetaFromDOI(client *http.Client, doi []byte) (*Meta, error) { if err != nil { return nil, err } + r := bufio.NewReader(resp.Body) d := xml.NewDecoder(r) @@ -254,35 +246,38 @@ func getMetaFromDOI(client *http.Client, doi []byte) (*Meta, error) { // getPaper saves makes an outbound request to a remote resource and saves the // response body to a temporary file, returning its path, provided the response // has the content-type application/pdf -func getPaper(client *http.Client, u string) (string, error) { +func getPaper(client *http.Client, scihub *url.URL, resource string) (string, error) { - resp, err := makeRequest(client, u) + ref, err := url.Parse(resource) if err != nil { return "", err } + refURL := scihub.ResolveReference(ref) // scihub + resource + + resp, err := makeRequest(client, refURL.String()) + if err != nil { + return "", err + } + defer resp.Body.Close() doc, err := html.Parse(resp.Body) if err != nil { return "", err } - var dl *url.URL + var directLink *url.URL var f func(*html.Node) f = func(n *html.Node) { - if n.Type == html.ElementNode && n.Data == "embed" { + if n.Type == html.ElementNode { for _, a := range n.Attr { if a.Key == "src" { - _u, err := url.Parse(u) - if err != nil { - continue - } _v, err := url.Parse(a.Val) if err != nil { continue } if strings.HasSuffix(_v.Path, "pdf") { - _u.Path = _v.Path - dl = _u + directLink = scihub.ResolveReference(_v) + break } } } @@ -293,25 +288,23 @@ func getPaper(client *http.Client, u string) (string, error) { } f(doc) - if dl == nil || dl.String() == "" { - return "", fmt.Errorf("%q: could not locate PDF direct link", u) + if directLink == nil || directLink.String() == "" { + return "", fmt.Errorf("%q: could not locate PDF link", refURL.String()) } - resp, err = makeRequest(client, dl.String()) + resp, err = makeRequest(client, directLink.String()) if err != nil { return "", err } - + defer resp.Body.Close() if resp.Header.Get("content-type") != "application/pdf" { - return "", fmt.Errorf("%q: parsed PDF direct link not application/pdf", u) + return "", fmt.Errorf("%q: content-type not application/pdf", refURL.String()) } tmpPDF, err := ioutil.TempFile("", "tmp-*.pdf") if err != nil { return "", err } - - // write resp.Body (paper data) to tmpPDF if err := saveRespBody(resp, tmpPDF.Name()); err != nil { return "", err } |