diff options
author | Jordan <me@jordan.im> | 2020-06-20 19:53:02 -0700 |
---|---|---|
committer | Jordan <me@jordan.im> | 2020-06-20 19:53:02 -0700 |
commit | 0122010e7028f089d2c0a8ccd37048d5e1e88753 (patch) | |
tree | fad4609f08df77857a8c9dd1e252ef5a4e9a8b3b | |
parent | 7239112a3e9d65dcb370e88b1d754cb77a7d9995 (diff) | |
download | springer-dl-0122010e7028f089d2c0a8ccd37048d5e1e88753.tar.gz springer-dl-0122010e7028f089d2c0a8ccd37048d5e1e88753.zip |
resolve issue #1, sanitize book names
-rw-r--r-- | lib/util.py | 5 | ||||
-rwxr-xr-x | springer_dl.py | 6 |
2 files changed, 8 insertions, 3 deletions
diff --git a/lib/util.py b/lib/util.py index 1bb9811..0f6bd81 100644 --- a/lib/util.py +++ b/lib/util.py @@ -40,3 +40,8 @@ def download_file(url, headers, dest): if not chunk: break f.write(chunk) + +def sanitize_name(name): + safe = (' ', '.', '_', '-') + name = ''.join(c for c in name if c.isalnum() or c in safe).rstrip() + return name diff --git a/springer_dl.py b/springer_dl.py index b214250..f104b17 100755 --- a/springer_dl.py +++ b/springer_dl.py @@ -4,7 +4,7 @@ import argparse import os import sys import urllib.parse -from lib.util import Parser, request, download_file +from lib.util import Parser, request, download_file, sanitize_name if __name__ == '__main__': desc = 'springer-dl: download the set of books Springer released for free '\ @@ -43,7 +43,7 @@ if __name__ == '__main__': links = [urllib.parse.urljoin(end_url, x) + '?javascript-disabled=true' for x in links] book_title = p.title[0].split(' |')[0] - book_path = os.path.join(dl_path, book_title) + book_path = os.path.join(dl_path, sanitize_name(book_title)) os.makedirs(book_path, exist_ok=True) if 'epub' in links[1]: @@ -54,7 +54,7 @@ if __name__ == '__main__': for link in links: filename = '%s - %s' % (book_title, isbn) + os.path.splitext( urllib.parse.urlparse(link).path)[-1] - filepath = os.path.join(book_path, filename) + filepath = os.path.join(book_path, sanitize_name(filename)) if os.path.exists(filepath): continue try: |