From 64f16b6a7684a4054f46b009b1cb5a0c3751c6dd Mon Sep 17 00:00:00 2001 From: Jordan Date: Wed, 9 Mar 2022 07:21:53 +0000 Subject: initial commit --- doi-to-filename.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100755 doi-to-filename.py (limited to 'doi-to-filename.py') diff --git a/doi-to-filename.py b/doi-to-filename.py new file mode 100755 index 0000000..9bacf97 --- /dev/null +++ b/doi-to-filename.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +import sys +import argparse +import urllib.request +import xml.etree.cElementTree as ET + +def doi_to_filename(doi): + url = 'https://doi.org/%s' % doi + headers = {'Accept': 'application/vnd.crossref.unixref+xml;q=1,' + + 'application/rdf+xml;q=0.5'} + # doi.org API request + try: + resp = request(url, headers) + except Exception as err: + raise ValueError('error making API request; invalid DOI? %s' % err) + + # derive filename from XML response + try: + xml_root = get_xml_root(resp) + filename = get_filename_from_xml(xml_root) + except Exception as err: + raise ValueError('error parsing XML response; invalid DOI? %s' % err) + + return filename + +def request(url, headers): + conn = urllib.request.Request( + url, + headers=headers + ) + r = urllib.request.urlopen(conn) + return r.read().decode('utf-8') + +def get_xml_root(resp): + tree = ET.ElementTree(ET.fromstring(resp)) + return tree.getroot() + +def get_filename_from_xml(xml_root): + title = None + year = None + authors = [] + + title = xml_root.find('.//title').text + if not title: + raise ValueError('title could not be parsed, aborting...') + + year = xml_root.find('.//year').text + if year: + year = ' (%s)' % year + else: + year = '' + + for a in xml_root.iter('surname'): + authors.append(a.text) + if authors: + authors = ' - ' + ', '.join(authors) + else: + authors = '' + + filename = title + year + authors + return filename -- cgit v1.2.3-54-g00ecf