diff options
Diffstat (limited to 'springer_dl.py')
-rwxr-xr-x | springer_dl.py | 47 |
1 files changed, 46 insertions, 1 deletions
diff --git a/springer_dl.py b/springer_dl.py index f104b17..a147e55 100755 --- a/springer_dl.py +++ b/springer_dl.py @@ -4,7 +4,52 @@ import argparse import os import sys import urllib.parse -from lib.util import Parser, request, download_file, sanitize_name +import urllib.request +from html.parser import HTMLParser + +class Parser(HTMLParser): + def __init__(self, links=None): + HTMLParser.__init__(self) + if links is None: + self.links = [] + else: + self.links = links + self.title = [] + self.current_tag = None + def handle_starttag(self, tag, attrs): + self.current_tag = tag + if tag == 'a': + self.links.append(dict(attrs).get('href')) + def handle_data(self, data): + if self.current_tag == 'title': + self.title.append(data) + +def request(url, headers): + conn = urllib.request.Request( + url, + headers=headers + ) + r = urllib.request.urlopen(conn) + return r + +def download_file(url, headers, dest): + BLOCK = 16 * 1024 + conn = urllib.request.Request( + url, + headers=headers + ) + resp = urllib.request.urlopen(conn) + with open(dest, 'wb') as f: + while True: + chunk = resp.read(BLOCK) + if not chunk: + break + f.write(chunk) + +def sanitize_name(name): + safe = (' ', '.', '_', '-') + name = ''.join(c for c in name if c.isalnum() or c in safe).rstrip() + return name if __name__ == '__main__': desc = 'springer-dl: download the set of books Springer released for free '\ |