summaryrefslogtreecommitdiff
path: root/qutebrowser/utils/urlutils.py
diff options
context:
space:
mode:
authorFlorian Bruhin <me@the-compiler.org>2021-01-04 21:28:56 +0100
committerFlorian Bruhin <me@the-compiler.org>2021-01-05 11:19:15 +0100
commit40464ebe3df06d88e2f4dc1ffcd7fb8df9e77170 (patch)
treed87fefb806c805ab4e50096d388a94edfca992ac /qutebrowser/utils/urlutils.py
parentf0486432d13cc9563a4866c6cf96af98d0862035 (diff)
downloadqutebrowser-40464ebe3df06d88e2f4dc1ffcd7fb8df9e77170.tar.gz
qutebrowser-40464ebe3df06d88e2f4dc1ffcd7fb8df9e77170.zip
Remove custom data: URL parsing
Argh. I should've known Python can do this, but I didn't notice (or forget?) and wrote code - some 140 lines of it - which only was in this repository for a couple of hours. Oh well, good riddance, code which was replaced by a simple "mimetypes.guess_type(url.toString())". May you be resurrected if we ever need a proper data: URL parser at a later point. I guess some lessons have to be learned the hard way...
Diffstat (limited to 'qutebrowser/utils/urlutils.py')
-rw-r--r--qutebrowser/utils/urlutils.py52
1 files changed, 3 insertions, 49 deletions
diff --git a/qutebrowser/utils/urlutils.py b/qutebrowser/utils/urlutils.py
index fa7867d4d..b5afe958c 100644
--- a/qutebrowser/utils/urlutils.py
+++ b/qutebrowser/utils/urlutils.py
@@ -394,52 +394,6 @@ def get_path_if_valid(pathstr: str,
return path
-def parse_data_url(url: QUrl) -> Tuple[str, str, bytes]:
- """Parse a data URL.
-
- Returns a tuple with:
- 1) The media type
- 2) Media type parameters (currently without any further parsing)
- 3) The (possibly decoded) data
-
- Based on https://en.wikipedia.org/wiki/Data_URI_scheme
-
- Possible further inspiration:
- https://github.com/scrapy/w3lib/blob/v1.22.0/w3lib/url.py#L324-L384
- """
- ensure_valid(url)
- if url.scheme().lower() != 'data':
- raise Error(f"URL {url.toDisplayString()} has no data: scheme")
- if ',' not in url.path():
- raise Error("Missing comma")
-
- encoded = url.toEncoded().data()
- encoded = encoded[len('data:'):] # strip off scheme
- encoded = urllib.parse.unquote_to_bytes(encoded)
- encoded_header, data = encoded.split(b',', 1)
-
- try:
- header = encoded_header.decode('ascii')
- except UnicodeDecodeError as e:
- raise Error(f"Invalid header in {url.toDisplayString()}: {e}")
-
- b64_suffix = ';base64'
- if header.endswith(b64_suffix):
- header = header[:-len(b64_suffix)]
- data = base64.b64decode(data)
-
- if ';' in header:
- media_type, params = header.split(';', 1)
- else:
- media_type = header
- params = ''
-
- if not media_type:
- media_type = 'text/plain'
-
- return media_type, params, data
-
-
def filename_from_url(url: QUrl, fallback: str = None) -> Optional[str]:
"""Get a suitable filename from a URL.
@@ -454,11 +408,11 @@ def filename_from_url(url: QUrl, fallback: str = None) -> Optional[str]:
return fallback
if url.scheme().lower() == 'data':
- media_type, _params, _data = parse_data_url(url)
- if not media_type:
+ mimetype, _encoding = mimetypes.guess_type(url.toString())
+ if not mimetype:
return fallback
- ext = mimetypes.guess_extension(media_type, strict=False) or ''
+ ext = mimetypes.guess_extension(mimetype, strict=False) or ''
return 'download' + ext
pathname = posixpath.basename(url.path())