summaryrefslogtreecommitdiff
path: root/misc/userscripts/getbib
blob: 0ab0ba54dcc8775fd56a5533ee86e578aa50af90 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python3
"""Qutebrowser userscript scraping the current web page for DOIs and downloading
corresponding bibtex information.

Set the environment variable 'QUTE_BIB_FILEPATH' to indicate the path to
download to. Otherwise, bibtex information is downloaded to '/tmp' and hence
deleted at reboot.

Installation: see qute://help/userscripts.html

Inspired by
https://ocefpaf.github.io/python4oceanographers/blog/2014/05/19/doi2bibtex/
"""

import os
import sys
import re
from collections import Counter
from urllib import parse as url_parse
from urllib import request as url_request


FIFO_PATH = os.getenv("QUTE_FIFO")

def message_fifo(message, level="warning"):
    """Send message to qutebrowser FIFO. The level must be one of 'info',
    'warning' (default) or 'error'."""
    with open(FIFO_PATH, "w") as fifo:
        fifo.write("message-{} '{}'".format(level, message))


source = os.getenv("QUTE_TEXT")
with open(source) as f:
    text = f.read()

# find DOIs on page using regex
dval = re.compile(r'(10\.(\d)+/([^(\s\>\"\<)])+)')
# https://stackoverflow.com/a/10324802/3865876, too strict
# dval = re.compile(r'\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])\S)+)\b')
dois = dval.findall(text)
dois = Counter(e[0] for e in dois)
try:
    doi = dois.most_common(1)[0][0]
except IndexError:
    message_fifo("No DOIs found on page")
    sys.exit()
message_fifo("Found {} DOIs on page, selecting {}".format(len(dois), doi),
             level="info")

# get bibtex data corresponding to DOI
url = "https://dx.doi.org/" + url_parse.quote(doi)
headers = dict(Accept='text/bibliography; style=bibtex')
request = url_request.Request(url, headers=headers)
response = url_request.urlopen(request)
status_code = response.getcode()
if status_code >= 400:
    message_fifo("Request returned {}".format(status_code))
    sys.exit()

# obtain content and format it
bibtex = response.read().decode("utf-8").strip()
bibtex = bibtex.replace(" ", "\n    ", 1).\
    replace("}, ", "},\n    ").replace("}}", "}\n}")

# append to file
bib_filepath = os.getenv("QUTE_BIB_FILEPATH", "/tmp/qute.bib")
with open(bib_filepath, "a") as f:
    f.write(bibtex + "\n\n")