From 9ae08c0f159e8c23e5eb24dc499dc806cc255ae6 Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Mon, 4 Jan 2021 19:36:42 +0100 Subject: Improve download filenames for data: URLs With QtWebEngine, downloading a data: URL seems to give us the raw data: URL as filename, similar to #1214 / #1321 but for QtWebEngine. With QtWebKit, the logic is now also improved so that we get a proper extension rather than a "binary blob" filename. See #1099 --- qutebrowser/browser/qtnetworkdownloads.py | 42 +++++++++---------- .../browser/webengine/webenginedownloads.py | 11 ++++- tests/end2end/data/data_link.html | 3 +- tests/end2end/features/downloads.feature | 14 ++++++- .../browser/webengine/test_webenginedownloads.py | 48 +++++++++++++++++++++- 5 files changed, 90 insertions(+), 28 deletions(-) diff --git a/qutebrowser/browser/qtnetworkdownloads.py b/qutebrowser/browser/qtnetworkdownloads.py index 0bf165965..3aebbb6dd 100644 --- a/qutebrowser/browser/qtnetworkdownloads.py +++ b/qutebrowser/browser/qtnetworkdownloads.py @@ -465,6 +465,25 @@ class DownloadManager(downloads.AbstractDownloadManager): mhtml.start_download_checked, tab=tab)) message.global_bridge.ask(question, blocking=False) + def _get_suggested_filename(self, request): + """Get the suggested filename for the given request.""" + filename_url = request.url() + if request.url().scheme().lower() == 'data': + # We might be downloading a binary blob embedded on a page or even + # generated dynamically via javascript. If we happen to know where it's + # coming from, we can try to figure out a more sensible name than the base64 + # content of the data. + origin = request.originatingObject() + try: + filename_url = origin.url() + except AttributeError: + # Raised either if origin is None or some object that doesn't + # have its own url. We're probably fine with a default fallback + # based on the data URL then. + pass + + return urlutils.filename_from_url(filename_url, fallback='qutebrowser-download') + def get_request(self, request, *, target=None, suggested_fn=None, **kwargs): """Start a download with a QNetworkRequest. @@ -482,29 +501,8 @@ class DownloadManager(downloads.AbstractDownloadManager): request.setAttribute(QNetworkRequest.CacheLoadControlAttribute, QNetworkRequest.AlwaysNetwork) - if suggested_fn is not None: - pass - elif request.url().scheme().lower() != 'data': - suggested_fn = urlutils.filename_from_url(request.url()) - else: - # We might be downloading a binary blob embedded on a page or even - # generated dynamically via javascript. We try to figure out a more - # sensible name than the base64 content of the data. - origin = request.originatingObject() - try: - origin_url = origin.url() - except AttributeError: - # Raised either if origin is None or some object that doesn't - # have its own url. We're probably fine with a default fallback - # then. - suggested_fn = 'binary blob' - else: - # Use the originating URL as a base for the filename (works - # e.g. for pdf.js). - suggested_fn = urlutils.filename_from_url(origin_url) - if suggested_fn is None: - suggested_fn = 'qutebrowser-download' + suggested_fn = self._get_suggested_filename(request) return self._fetch_request(request, target=target, diff --git a/qutebrowser/browser/webengine/webenginedownloads.py b/qutebrowser/browser/webengine/webenginedownloads.py index ef592c98c..206f7b9ad 100644 --- a/qutebrowser/browser/webengine/webenginedownloads.py +++ b/qutebrowser/browser/webengine/webenginedownloads.py @@ -27,7 +27,7 @@ from PyQt5.QtCore import pyqtSlot, Qt, QUrl, QObject from PyQt5.QtWebEngineWidgets import QWebEngineDownloadItem from qutebrowser.browser import downloads, pdfjs -from qutebrowser.utils import debug, usertypes, message, log, objreg +from qutebrowser.utils import debug, usertypes, message, log, objreg, urlutils class DownloadItem(downloads.AbstractDownloadItem): @@ -249,7 +249,14 @@ class DownloadManager(downloads.AbstractDownloadManager): @pyqtSlot(QWebEngineDownloadItem) def handle_download(self, qt_item): """Start a download coming from a QWebEngineProfile.""" - suggested_filename = _get_suggested_filename(qt_item.path()) + if qt_item.url().scheme().lower() == 'data': + # WORKAROUND for an unknown QtWebEngine bug (?) which gives us base64 data + # as filename. + suggested_filename = urlutils.filename_from_url( + qt_item.url(), fallback='qutebrowser-download') + else: + suggested_filename = _get_suggested_filename(qt_item.path()) + use_pdfjs = pdfjs.should_use_pdfjs(qt_item.mimeType(), qt_item.url()) download = DownloadItem(qt_item, manager=self) diff --git a/tests/end2end/data/data_link.html b/tests/end2end/data/data_link.html index 227a9f2f9..29ecadb68 100644 --- a/tests/end2end/data/data_link.html +++ b/tests/end2end/data/data_link.html @@ -5,6 +5,7 @@ data: link - download + plaintext + PDF download diff --git a/tests/end2end/features/downloads.feature b/tests/end2end/features/downloads.feature index df2e10b46..57d6dfb7f 100644 --- a/tests/end2end/features/downloads.feature +++ b/tests/end2end/features/downloads.feature @@ -87,8 +87,18 @@ Feature: Downloading things from a website. When I set downloads.location.suggestion to filename And I set downloads.location.prompt to true And I open data/data_link.html - And I hint with args "links download" and follow a - And I wait for "Asking question option=None text=* title='Save file to:'>, *" in the log + And I hint with args "links download" and follow s + And I wait for "Asking question option=None text=* title='Save file to:'>, *" in the log + And I run :leave-mode + Then no crash should happen + + @qtwebkit_skip + Scenario: Downloading a data: link via QtWebEngine (issue 1214) + When I set downloads.location.suggestion to filename + And I set downloads.location.prompt to true + And I open data/data_link.html + And I hint with args "links" and follow s + And I wait for "Asking question option=None text=* title='Save file to:'>, *" in the log And I run :leave-mode Then no crash should happen diff --git a/tests/unit/browser/webengine/test_webenginedownloads.py b/tests/unit/browser/webengine/test_webenginedownloads.py index 3749b01f5..98918571b 100644 --- a/tests/unit/browser/webengine/test_webenginedownloads.py +++ b/tests/unit/browser/webengine/test_webenginedownloads.py @@ -18,11 +18,13 @@ # along with qutebrowser. If not, see . import os.path +import base64 import pytest - pytest.importorskip('PyQt5.QtWebEngineWidgets') +from PyQt5.QtWebEngineWidgets import QWebEngineProfile +from qutebrowser.utils import urlutils from qutebrowser.browser.webengine import webenginedownloads @@ -38,3 +40,47 @@ from qutebrowser.browser.webengine import webenginedownloads ]) def test_get_suggested_filename(path, expected): assert webenginedownloads._get_suggested_filename(path) == expected + + +@pytest.mark.parametrize('with_slash', [True, False]) +def test_data_url_workaround_needed(qapp, qtbot, webengineview, with_slash): + """With data URLs, we get rather weird base64 filenames back from QtWebEngine. + + This test verifies that our workaround for this is still needed, i.e. if we get + those base64-filenames rather than a "download.pdf" like with Chromium. + """ + # https://stackoverflow.com/a/17280876/2085149 + pdf_source = [ + '%PDF-1.0', + '1 0 obj<>endobj', + '2 0 obj<>endobj', + '3 0 obj<>endobj', + 'trailer<>', + ] + + if with_slash: + pdf_source.insert(1, '% ?') # this results in a slash in base64 + + pdf_data = '\n'.join(pdf_source).encode('ascii') + base64_data = base64.b64encode(pdf_data).decode('ascii') + + if with_slash: + assert '/' in base64_data + expected = base64_data.split('/')[1] + else: + assert '/' not in base64_data + expected = 'pdf' # from the mimetype + + def check_item(item): + assert item.mimeType() == 'application/pdf' + assert item.url().scheme() == 'data' + assert os.path.basename(item.path()) == expected + return True + + profile = QWebEngineProfile.defaultProfile() + profile.setParent(qapp) + + url = urlutils.data_url('application/pdf', pdf_data) + + with qtbot.waitSignal(profile.downloadRequested, check_params_cb=check_item): + webengineview.load(url) -- cgit v1.2.3-54-g00ecf