summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Bruhin <me@the-compiler.org>2021-01-04 19:36:42 +0100
committerFlorian Bruhin <me@the-compiler.org>2021-01-05 11:19:01 +0100
commit9ae08c0f159e8c23e5eb24dc499dc806cc255ae6 (patch)
treed1e6d644140f818b5af9f0c9c7a59514c2d8befa
parent0a6c488b88fc37c07f6cecaaa03167a89063da74 (diff)
downloadqutebrowser-9ae08c0f159e8c23e5eb24dc499dc806cc255ae6.tar.gz
qutebrowser-9ae08c0f159e8c23e5eb24dc499dc806cc255ae6.zip
Improve download filenames for data: URLs
With QtWebEngine, downloading a data: URL seems to give us the raw data: URL as filename, similar to #1214 / #1321 but for QtWebEngine. With QtWebKit, the logic is now also improved so that we get a proper extension rather than a "binary blob" filename. See #1099
-rw-r--r--qutebrowser/browser/qtnetworkdownloads.py42
-rw-r--r--qutebrowser/browser/webengine/webenginedownloads.py11
-rw-r--r--tests/end2end/data/data_link.html3
-rw-r--r--tests/end2end/features/downloads.feature14
-rw-r--r--tests/unit/browser/webengine/test_webenginedownloads.py48
5 files changed, 90 insertions, 28 deletions
diff --git a/qutebrowser/browser/qtnetworkdownloads.py b/qutebrowser/browser/qtnetworkdownloads.py
index 0bf165965..3aebbb6dd 100644
--- a/qutebrowser/browser/qtnetworkdownloads.py
+++ b/qutebrowser/browser/qtnetworkdownloads.py
@@ -465,6 +465,25 @@ class DownloadManager(downloads.AbstractDownloadManager):
mhtml.start_download_checked, tab=tab))
message.global_bridge.ask(question, blocking=False)
+ def _get_suggested_filename(self, request):
+ """Get the suggested filename for the given request."""
+ filename_url = request.url()
+ if request.url().scheme().lower() == 'data':
+ # We might be downloading a binary blob embedded on a page or even
+ # generated dynamically via javascript. If we happen to know where it's
+ # coming from, we can try to figure out a more sensible name than the base64
+ # content of the data.
+ origin = request.originatingObject()
+ try:
+ filename_url = origin.url()
+ except AttributeError:
+ # Raised either if origin is None or some object that doesn't
+ # have its own url. We're probably fine with a default fallback
+ # based on the data URL then.
+ pass
+
+ return urlutils.filename_from_url(filename_url, fallback='qutebrowser-download')
+
def get_request(self, request, *, target=None,
suggested_fn=None, **kwargs):
"""Start a download with a QNetworkRequest.
@@ -482,29 +501,8 @@ class DownloadManager(downloads.AbstractDownloadManager):
request.setAttribute(QNetworkRequest.CacheLoadControlAttribute,
QNetworkRequest.AlwaysNetwork)
- if suggested_fn is not None:
- pass
- elif request.url().scheme().lower() != 'data':
- suggested_fn = urlutils.filename_from_url(request.url())
- else:
- # We might be downloading a binary blob embedded on a page or even
- # generated dynamically via javascript. We try to figure out a more
- # sensible name than the base64 content of the data.
- origin = request.originatingObject()
- try:
- origin_url = origin.url()
- except AttributeError:
- # Raised either if origin is None or some object that doesn't
- # have its own url. We're probably fine with a default fallback
- # then.
- suggested_fn = 'binary blob'
- else:
- # Use the originating URL as a base for the filename (works
- # e.g. for pdf.js).
- suggested_fn = urlutils.filename_from_url(origin_url)
-
if suggested_fn is None:
- suggested_fn = 'qutebrowser-download'
+ suggested_fn = self._get_suggested_filename(request)
return self._fetch_request(request,
target=target,
diff --git a/qutebrowser/browser/webengine/webenginedownloads.py b/qutebrowser/browser/webengine/webenginedownloads.py
index ef592c98c..206f7b9ad 100644
--- a/qutebrowser/browser/webengine/webenginedownloads.py
+++ b/qutebrowser/browser/webengine/webenginedownloads.py
@@ -27,7 +27,7 @@ from PyQt5.QtCore import pyqtSlot, Qt, QUrl, QObject
from PyQt5.QtWebEngineWidgets import QWebEngineDownloadItem
from qutebrowser.browser import downloads, pdfjs
-from qutebrowser.utils import debug, usertypes, message, log, objreg
+from qutebrowser.utils import debug, usertypes, message, log, objreg, urlutils
class DownloadItem(downloads.AbstractDownloadItem):
@@ -249,7 +249,14 @@ class DownloadManager(downloads.AbstractDownloadManager):
@pyqtSlot(QWebEngineDownloadItem)
def handle_download(self, qt_item):
"""Start a download coming from a QWebEngineProfile."""
- suggested_filename = _get_suggested_filename(qt_item.path())
+ if qt_item.url().scheme().lower() == 'data':
+ # WORKAROUND for an unknown QtWebEngine bug (?) which gives us base64 data
+ # as filename.
+ suggested_filename = urlutils.filename_from_url(
+ qt_item.url(), fallback='qutebrowser-download')
+ else:
+ suggested_filename = _get_suggested_filename(qt_item.path())
+
use_pdfjs = pdfjs.should_use_pdfjs(qt_item.mimeType(), qt_item.url())
download = DownloadItem(qt_item, manager=self)
diff --git a/tests/end2end/data/data_link.html b/tests/end2end/data/data_link.html
index 227a9f2f9..29ecadb68 100644
--- a/tests/end2end/data/data_link.html
+++ b/tests/end2end/data/data_link.html
@@ -5,6 +5,7 @@
<title>data: link</title>
</head>
<body>
- <a href="data:;base64,cXV0ZWJyb3dzZXI=" id="link">download</a>
+ <a href="data:;base64,cXV0ZWJyb3dzZXI=" id="link">plaintext</a>
+ <a href="data:application/pdf;base64,cXV0ZWJyb3dzZXI=" id="pdf">PDF download</a>
</body>
</html>
diff --git a/tests/end2end/features/downloads.feature b/tests/end2end/features/downloads.feature
index df2e10b46..57d6dfb7f 100644
--- a/tests/end2end/features/downloads.feature
+++ b/tests/end2end/features/downloads.feature
@@ -87,8 +87,18 @@ Feature: Downloading things from a website.
When I set downloads.location.suggestion to filename
And I set downloads.location.prompt to true
And I open data/data_link.html
- And I hint with args "links download" and follow a
- And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='binary blob' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log
+ And I hint with args "links download" and follow s
+ And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='download.pdf' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log
+ And I run :leave-mode
+ Then no crash should happen
+
+ @qtwebkit_skip
+ Scenario: Downloading a data: link via QtWebEngine (issue 1214)
+ When I set downloads.location.suggestion to filename
+ And I set downloads.location.prompt to true
+ And I open data/data_link.html
+ And I hint with args "links" and follow s
+ And I wait for "Asking question <qutebrowser.utils.usertypes.Question default='download.pdf' mode=<PromptMode.download: 5> option=None text=* title='Save file to:'>, *" in the log
And I run :leave-mode
Then no crash should happen
diff --git a/tests/unit/browser/webengine/test_webenginedownloads.py b/tests/unit/browser/webengine/test_webenginedownloads.py
index 3749b01f5..98918571b 100644
--- a/tests/unit/browser/webengine/test_webenginedownloads.py
+++ b/tests/unit/browser/webengine/test_webenginedownloads.py
@@ -18,11 +18,13 @@
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
import os.path
+import base64
import pytest
-
pytest.importorskip('PyQt5.QtWebEngineWidgets')
+from PyQt5.QtWebEngineWidgets import QWebEngineProfile
+from qutebrowser.utils import urlutils
from qutebrowser.browser.webengine import webenginedownloads
@@ -38,3 +40,47 @@ from qutebrowser.browser.webengine import webenginedownloads
])
def test_get_suggested_filename(path, expected):
assert webenginedownloads._get_suggested_filename(path) == expected
+
+
+@pytest.mark.parametrize('with_slash', [True, False])
+def test_data_url_workaround_needed(qapp, qtbot, webengineview, with_slash):
+ """With data URLs, we get rather weird base64 filenames back from QtWebEngine.
+
+ This test verifies that our workaround for this is still needed, i.e. if we get
+ those base64-filenames rather than a "download.pdf" like with Chromium.
+ """
+ # https://stackoverflow.com/a/17280876/2085149
+ pdf_source = [
+ '%PDF-1.0',
+ '1 0 obj<</Pages 2 0 R>>endobj',
+ '2 0 obj<</Kids[3 0 R]/Count 1>>endobj',
+ '3 0 obj<</MediaBox[0 0 3 3]>>endobj',
+ 'trailer<</Root 1 0 R>>',
+ ]
+
+ if with_slash:
+ pdf_source.insert(1, '% ?') # this results in a slash in base64
+
+ pdf_data = '\n'.join(pdf_source).encode('ascii')
+ base64_data = base64.b64encode(pdf_data).decode('ascii')
+
+ if with_slash:
+ assert '/' in base64_data
+ expected = base64_data.split('/')[1]
+ else:
+ assert '/' not in base64_data
+ expected = 'pdf' # from the mimetype
+
+ def check_item(item):
+ assert item.mimeType() == 'application/pdf'
+ assert item.url().scheme() == 'data'
+ assert os.path.basename(item.path()) == expected
+ return True
+
+ profile = QWebEngineProfile.defaultProfile()
+ profile.setParent(qapp)
+
+ url = urlutils.data_url('application/pdf', pdf_data)
+
+ with qtbot.waitSignal(profile.downloadRequested, check_params_cb=check_item):
+ webengineview.load(url)