aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan <me@jordan.im>2020-06-20 19:53:02 -0700
committerJordan <me@jordan.im>2020-06-20 19:53:02 -0700
commit0122010e7028f089d2c0a8ccd37048d5e1e88753 (patch)
treefad4609f08df77857a8c9dd1e252ef5a4e9a8b3b
parent7239112a3e9d65dcb370e88b1d754cb77a7d9995 (diff)
downloadspringer-dl-0122010e7028f089d2c0a8ccd37048d5e1e88753.tar.gz
springer-dl-0122010e7028f089d2c0a8ccd37048d5e1e88753.zip
resolve issue #1, sanitize book names
-rw-r--r--lib/util.py5
-rwxr-xr-xspringer_dl.py6
2 files changed, 8 insertions, 3 deletions
diff --git a/lib/util.py b/lib/util.py
index 1bb9811..0f6bd81 100644
--- a/lib/util.py
+++ b/lib/util.py
@@ -40,3 +40,8 @@ def download_file(url, headers, dest):
if not chunk:
break
f.write(chunk)
+
+def sanitize_name(name):
+ safe = (' ', '.', '_', '-')
+ name = ''.join(c for c in name if c.isalnum() or c in safe).rstrip()
+ return name
diff --git a/springer_dl.py b/springer_dl.py
index b214250..f104b17 100755
--- a/springer_dl.py
+++ b/springer_dl.py
@@ -4,7 +4,7 @@ import argparse
import os
import sys
import urllib.parse
-from lib.util import Parser, request, download_file
+from lib.util import Parser, request, download_file, sanitize_name
if __name__ == '__main__':
desc = 'springer-dl: download the set of books Springer released for free '\
@@ -43,7 +43,7 @@ if __name__ == '__main__':
links = [urllib.parse.urljoin(end_url, x) + '?javascript-disabled=true' for x in links]
book_title = p.title[0].split(' |')[0]
- book_path = os.path.join(dl_path, book_title)
+ book_path = os.path.join(dl_path, sanitize_name(book_title))
os.makedirs(book_path, exist_ok=True)
if 'epub' in links[1]:
@@ -54,7 +54,7 @@ if __name__ == '__main__':
for link in links:
filename = '%s - %s' % (book_title, isbn) + os.path.splitext(
urllib.parse.urlparse(link).path)[-1]
- filepath = os.path.join(book_path, filename)
+ filepath = os.path.join(book_path, sanitize_name(filename))
if os.path.exists(filepath):
continue
try: