diff options
-rw-r--r-- | lib/util.py | 47 | ||||
-rwxr-xr-x | springer_dl.py | 47 |
2 files changed, 46 insertions, 48 deletions
diff --git a/lib/util.py b/lib/util.py deleted file mode 100644 index 0f6bd81..0000000 --- a/lib/util.py +++ /dev/null @@ -1,47 +0,0 @@ -import re -import urllib.request -from html.parser import HTMLParser - -class Parser(HTMLParser): - def __init__(self, links=None): - HTMLParser.__init__(self) - if links is None: - self.links = [] - else: - self.links = links - self.title = [] - self.current_tag = None - def handle_starttag(self, tag, attrs): - self.current_tag = tag - if tag == 'a': - self.links.append(dict(attrs).get('href')) - def handle_data(self, data): - if self.current_tag == 'title': - self.title.append(data) - -def request(url, headers): - conn = urllib.request.Request( - url, - headers=headers - ) - r = urllib.request.urlopen(conn) - return r - -def download_file(url, headers, dest): - BLOCK = 16 * 1024 - conn = urllib.request.Request( - url, - headers=headers - ) - resp = urllib.request.urlopen(conn) - with open(dest, 'wb') as f: - while True: - chunk = resp.read(BLOCK) - if not chunk: - break - f.write(chunk) - -def sanitize_name(name): - safe = (' ', '.', '_', '-') - name = ''.join(c for c in name if c.isalnum() or c in safe).rstrip() - return name diff --git a/springer_dl.py b/springer_dl.py index f104b17..a147e55 100755 --- a/springer_dl.py +++ b/springer_dl.py @@ -4,7 +4,52 @@ import argparse import os import sys import urllib.parse -from lib.util import Parser, request, download_file, sanitize_name +import urllib.request +from html.parser import HTMLParser + +class Parser(HTMLParser): + def __init__(self, links=None): + HTMLParser.__init__(self) + if links is None: + self.links = [] + else: + self.links = links + self.title = [] + self.current_tag = None + def handle_starttag(self, tag, attrs): + self.current_tag = tag + if tag == 'a': + self.links.append(dict(attrs).get('href')) + def handle_data(self, data): + if self.current_tag == 'title': + self.title.append(data) + +def request(url, headers): + conn = urllib.request.Request( + url, + headers=headers + ) + r = urllib.request.urlopen(conn) + return r + +def download_file(url, headers, dest): + BLOCK = 16 * 1024 + conn = urllib.request.Request( + url, + headers=headers + ) + resp = urllib.request.urlopen(conn) + with open(dest, 'wb') as f: + while True: + chunk = resp.read(BLOCK) + if not chunk: + break + f.write(chunk) + +def sanitize_name(name): + safe = (' ', '.', '_', '-') + name = ''.join(c for c in name if c.isalnum() or c in safe).rstrip() + return name if __name__ == '__main__': desc = 'springer-dl: download the set of books Springer released for free '\ |