diff options
author | Florian Bruhin <me@the-compiler.org> | 2021-01-04 19:36:42 +0100 |
---|---|---|
committer | Florian Bruhin <me@the-compiler.org> | 2021-01-05 11:19:01 +0100 |
commit | 9ae08c0f159e8c23e5eb24dc499dc806cc255ae6 (patch) | |
tree | d1e6d644140f818b5af9f0c9c7a59514c2d8befa | |
parent | 0a6c488b88fc37c07f6cecaaa03167a89063da74 (diff) | |
download | qutebrowser-9ae08c0f159e8c23e5eb24dc499dc806cc255ae6.tar.gz qutebrowser-9ae08c0f159e8c23e5eb24dc499dc806cc255ae6.zip |
Improve download filenames for data: URLs
With QtWebEngine, downloading a data: URL seems to give us the raw data:
URL as filename, similar to #1214 / #1321 but for QtWebEngine.
With QtWebKit, the logic is now also improved so that we get a proper
extension rather than a "binary blob" filename.
See #1099
-rw-r--r-- | qutebrowser/browser/qtnetworkdownloads.py | 42 | ||||
-rw-r--r-- | qutebrowser/browser/webengine/webenginedownloads.py | 11 | ||||
-rw-r--r-- | tests/end2end/data/data_link.html | 3 | ||||
-rw-r--r-- | tests/end2end/features/downloads.feature | 14 | ||||
-rw-r--r-- | tests/unit/browser/webengine/test_webenginedownloads.py | 48 |
5 files changed, 90 insertions, 28 deletions
diff --git a/qutebrowser/browser/qtnetworkdownloads.py b/qutebrowser/browser/qtnetworkdownloads.py index 0bf165965..3aebbb6dd 100644 --- a/qutebrowser/browser/qtnetworkdownloads.py +++ b/qutebrowser/browser/qtnetworkdownloads.py @@ -465,6 +465,25 @@ class DownloadManager(downloads.AbstractDownloadManager): mhtml.start_download_checked, tab=tab)) message.global_bridge.ask(question, blocking=False) + def _get_suggested_filename(self, request): + """Get the suggested filename for the given request.""" + filename_url = request.url() + if request.url().scheme().lower() == 'data': + # We might be downloading a binary blob embedded on a page or even + # generated dynamically via javascript. If we happen to know where it's + # coming from, we can try to figure out a more sensible name than the base64 + # content of the data. + origin = request.originatingObject() + try: + filename_url = origin.url() + except AttributeError: + # Raised either if origin is None or some object that doesn't + # have its own url. We're probably fine with a default fallback + # based on the data URL then. + pass + + return urlutils.filename_from_url(filename_url, fallback='qutebrowser-download') + def get_request(self, request, *, target=None, suggested_fn=None, **kwargs): """Start a download with a QNetworkRequest. @@ -482,29 +501,8 @@ class DownloadManager(downloads.AbstractDownloadManager): request.setAttribute(QNetworkRequest.CacheLoadControlAttribute, QNetworkRequest.AlwaysNetwork) - if suggested_fn is not None: - pass - elif request.url().scheme().lower() != 'data': - suggested_fn = urlutils.filename_from_url(request.url()) - else: - # We might be downloading a binary blob embedded on a page or even - # generated dynamically via javascript. We try to figure out a more - # sensible name than the base64 content of the data. - origin = request.originatingObject() - try: - origin_url = origin.url() - except AttributeError: - # Raised either if origin is None or some object that doesn't - # have its own url. We're probably fine with a default fallback - # then. - suggested_fn = 'binary blob' - else: - # Use the originating URL as a base for the filename (works - # e.g. for pdf.js). - suggested_fn = urlutils.filename_from_url(origin_url) - if suggested_fn is None: - suggested_fn = 'qutebrowser-download' + suggested_fn = self._get_suggested_filename(request) return self._fetch_request(request, target=target, diff --git a/qutebrowser/browser/webengine/webenginedownloads.py b/qutebrowser/browser/webengine/webenginedownloads.py index ef592c98c..206f7b9ad 100644 --- a/qutebrowser/browser/webengine/webenginedownloads.py +++ b/qutebrowser/browser/webengine/webenginedownloads.py @@ -27,7 +27,7 @@ from PyQt5.QtCore import pyqtSlot, Qt, QUrl, QObject from PyQt5.QtWebEngineWidgets import QWebEngineDownloadItem from qutebrowser.browser import downloads, pdfjs -from qutebrowser.utils import debug, usertypes, message, log, objreg +from qutebrowser.utils import debug, usertypes, message, log, objreg, urlutils class DownloadItem(downloads.AbstractDownloadItem): @@ -249,7 +249,14 @@ class DownloadManager(downloads.AbstractDownloadManager): @pyqtSlot(QWebEngineDownloadItem) def handle_download(self, qt_item): """Start a download coming from a QWebEngineProfile.""" - suggested_filename = _get_suggested_filename(qt_item.path()) + if qt_item.url().scheme().lower() == 'data': + # WORKAROUND for an unknown QtWebEngine bug (?) which gives us base64 data + # as filename. + suggested_filename = urlutils.filename_from_url( + qt_item.url(), fallback='qutebrowser-download') + else: + suggested_filename = _get_suggested_filename(qt_item.path()) + use_pdfjs = pdfjs.should_use_pdfjs(qt_item.mimeType(), qt_item.url()) download = DownloadItem(qt_item, manager=self) diff --git a/tests/end2end/data/data_link.html b/tests/end2end/data/data_link.html index 227a9f2f9..29ecadb68 100644 --- a/tests/end2end/data/data_link.html +++ b/tests/end2end/data/data_link.html @@ -5,6 +5,7 @@ <title>data: link</title> </head> <body> - <a href="data:;base64,cXV0ZWJyb3dzZXI=" id="link">download</a> + <a href="data:;base64,cXV0ZWJyb3dzZXI=" id="link">plaintext</a> + <a href="data:application/pdf;base64,cXV0ZWJyb3dzZXI=" id="pdf">PDF download</a> </body> </html> diff --git a/tests/end2end/features/downloads.feature b/tests/end2end/features/downloads.feature index df2e10b46..57d6dfb7f 100644 --- a/tests/end2end/features/downloads.feature +++ b/tests/end2end/features/downloads.feature @@ -87,8 +87,18 @@ Feature: Downloading things from a website. When I set downloads.location.suggestion to filename And I set downloads.location.prompt to true And I open data/data_link.html - And I hint with args "links download" and follow a - And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='binary blob' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log + And I hint with args "links download" and follow s + And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='download.pdf' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log + And I run :leave-mode + Then no crash should happen + + @qtwebkit_skip + Scenario: Downloading a data: link via QtWebEngine (issue 1214) + When I set downloads.location.suggestion to filename + And I set downloads.location.prompt to true + And I open data/data_link.html + And I hint with args "links" and follow s + And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='download.pdf' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log And I run :leave-mode Then no crash should happen diff --git a/tests/unit/browser/webengine/test_webenginedownloads.py b/tests/unit/browser/webengine/test_webenginedownloads.py index 3749b01f5..98918571b 100644 --- a/tests/unit/browser/webengine/test_webenginedownloads.py +++ b/tests/unit/browser/webengine/test_webenginedownloads.py @@ -18,11 +18,13 @@ # along with qutebrowser. If not, see <http://www.gnu.org/licenses/>. import os.path +import base64 import pytest - pytest.importorskip('PyQt5.QtWebEngineWidgets') +from PyQt5.QtWebEngineWidgets import QWebEngineProfile +from qutebrowser.utils import urlutils from qutebrowser.browser.webengine import webenginedownloads @@ -38,3 +40,47 @@ from qutebrowser.browser.webengine import webenginedownloads ]) def test_get_suggested_filename(path, expected): assert webenginedownloads._get_suggested_filename(path) == expected + + +@pytest.mark.parametrize('with_slash', [True, False]) +def test_data_url_workaround_needed(qapp, qtbot, webengineview, with_slash): + """With data URLs, we get rather weird base64 filenames back from QtWebEngine. + + This test verifies that our workaround for this is still needed, i.e. if we get + those base64-filenames rather than a "download.pdf" like with Chromium. + """ + # https://stackoverflow.com/a/17280876/2085149 + pdf_source = [ + '%PDF-1.0', + '1 0 obj<</Pages 2 0 R>>endobj', + '2 0 obj<</Kids[3 0 R]/Count 1>>endobj', + '3 0 obj<</MediaBox[0 0 3 3]>>endobj', + 'trailer<</Root 1 0 R>>', + ] + + if with_slash: + pdf_source.insert(1, '% ?') # this results in a slash in base64 + + pdf_data = '\n'.join(pdf_source).encode('ascii') + base64_data = base64.b64encode(pdf_data).decode('ascii') + + if with_slash: + assert '/' in base64_data + expected = base64_data.split('/')[1] + else: + assert '/' not in base64_data + expected = 'pdf' # from the mimetype + + def check_item(item): + assert item.mimeType() == 'application/pdf' + assert item.url().scheme() == 'data' + assert os.path.basename(item.path()) == expected + return True + + profile = QWebEngineProfile.defaultProfile() + profile.setParent(qapp) + + url = urlutils.data_url('application/pdf', pdf_data) + + with qtbot.waitSignal(profile.downloadRequested, check_params_cb=check_item): + webengineview.load(url) |