doi-to-filename.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

#!/usr/bin/env python3

import sys
import argparse
import urllib.request
import xml.etree.cElementTree as ET

def doi_to_filename(doi):
    url = 'https://doi.org/%s' % doi
    headers = {'Accept': 'application/vnd.crossref.unixref+xml;q=1,' +
                         'application/rdf+xml;q=0.5'}
    # doi.org API request
    try:
        resp = request(url, headers)
    except Exception as err:
        raise ValueError('error making API request; invalid DOI? %s' % err)

    # derive filename from XML response
    try:
        xml_root = get_xml_root(resp)
        filename = get_filename_from_xml(xml_root)
    except Exception as err:
        raise ValueError('error parsing XML response; invalid DOI? %s' % err)

    return filename

def request(url, headers):
    conn = urllib.request.Request(
        url,
        headers=headers
    )
    r = urllib.request.urlopen(conn)
    return r.read().decode('utf-8')

def get_xml_root(resp):
    tree = ET.ElementTree(ET.fromstring(resp))
    return tree.getroot()

def get_filename_from_xml(xml_root):
    title = None
    year = None
    authors = []

    title = xml_root.find('.//title').text
    if not title:
        raise ValueError('title could not be parsed, aborting...')

    year = xml_root.find('.//year').text
    if year:
        year = ' (%s)' % year
    else:
        year = ''

    for a in xml_root.iter('surname'):
        authors.append(a.text)
    if authors:
        authors = ' - ' + ', '.join(authors)
    else:
        authors = ''

    filename = title + year + authors
    return filename