diff options
author | Florian Bruhin <me@the-compiler.org> | 2021-01-04 21:28:56 +0100 |
---|---|---|
committer | Florian Bruhin <me@the-compiler.org> | 2021-01-05 11:19:15 +0100 |
commit | 40464ebe3df06d88e2f4dc1ffcd7fb8df9e77170 (patch) | |
tree | d87fefb806c805ab4e50096d388a94edfca992ac /qutebrowser/utils/urlutils.py | |
parent | f0486432d13cc9563a4866c6cf96af98d0862035 (diff) | |
download | qutebrowser-40464ebe3df06d88e2f4dc1ffcd7fb8df9e77170.tar.gz qutebrowser-40464ebe3df06d88e2f4dc1ffcd7fb8df9e77170.zip |
Remove custom data: URL parsing
Argh. I should've known Python can do this, but I didn't notice (or
forget?) and wrote code - some 140 lines of it - which only was in this
repository for a couple of hours.
Oh well, good riddance, code which was replaced by a simple
"mimetypes.guess_type(url.toString())". May you be resurrected if we
ever need a proper data: URL parser at a later point.
I guess some lessons have to be learned the hard way...
Diffstat (limited to 'qutebrowser/utils/urlutils.py')
-rw-r--r-- | qutebrowser/utils/urlutils.py | 52 |
1 files changed, 3 insertions, 49 deletions
diff --git a/qutebrowser/utils/urlutils.py b/qutebrowser/utils/urlutils.py index fa7867d4d..b5afe958c 100644 --- a/qutebrowser/utils/urlutils.py +++ b/qutebrowser/utils/urlutils.py @@ -394,52 +394,6 @@ def get_path_if_valid(pathstr: str, return path -def parse_data_url(url: QUrl) -> Tuple[str, str, bytes]: - """Parse a data URL. - - Returns a tuple with: - 1) The media type - 2) Media type parameters (currently without any further parsing) - 3) The (possibly decoded) data - - Based on https://en.wikipedia.org/wiki/Data_URI_scheme - - Possible further inspiration: - https://github.com/scrapy/w3lib/blob/v1.22.0/w3lib/url.py#L324-L384 - """ - ensure_valid(url) - if url.scheme().lower() != 'data': - raise Error(f"URL {url.toDisplayString()} has no data: scheme") - if ',' not in url.path(): - raise Error("Missing comma") - - encoded = url.toEncoded().data() - encoded = encoded[len('data:'):] # strip off scheme - encoded = urllib.parse.unquote_to_bytes(encoded) - encoded_header, data = encoded.split(b',', 1) - - try: - header = encoded_header.decode('ascii') - except UnicodeDecodeError as e: - raise Error(f"Invalid header in {url.toDisplayString()}: {e}") - - b64_suffix = ';base64' - if header.endswith(b64_suffix): - header = header[:-len(b64_suffix)] - data = base64.b64decode(data) - - if ';' in header: - media_type, params = header.split(';', 1) - else: - media_type = header - params = '' - - if not media_type: - media_type = 'text/plain' - - return media_type, params, data - - def filename_from_url(url: QUrl, fallback: str = None) -> Optional[str]: """Get a suitable filename from a URL. @@ -454,11 +408,11 @@ def filename_from_url(url: QUrl, fallback: str = None) -> Optional[str]: return fallback if url.scheme().lower() == 'data': - media_type, _params, _data = parse_data_url(url) - if not media_type: + mimetype, _encoding = mimetypes.guess_type(url.toString()) + if not mimetype: return fallback - ext = mimetypes.guess_extension(media_type, strict=False) or '' + ext = mimetypes.guess_extension(mimetype, strict=False) or '' return 'download' + ext pathname = posixpath.basename(url.path()) |