summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Bruhin <me@the-compiler.org>2021-01-17 16:43:19 +0100
committerFlorian Bruhin <me@the-compiler.org>2021-01-17 17:02:07 +0100
commita3c0a8759ac336ba58c35ba75e0d42404f331446 (patch)
tree4c6252197d702cd448469bb58fe30de120a926e9
parent3c905f21fd2d6b1074f45bf8b2ad37a4455c447a (diff)
downloadqutebrowser-a3c0a8759ac336ba58c35ba75e0d42404f331446.tar.gz
qutebrowser-a3c0a8759ac336ba58c35ba75e0d42404f331446.zip
rfc6266: Consolidate files
-rw-r--r--qutebrowser/browser/webkit/http.py119
-rw-r--r--qutebrowser/browser/webkit/rfc6266.py130
-rw-r--r--qutebrowser/utils/log.py3
-rw-r--r--scripts/dev/check_coverage.py2
-rw-r--r--tests/unit/browser/webkit/http/test_content_disposition.py3
-rw-r--r--tests/unit/browser/webkit/http/test_http.py41
-rw-r--r--tests/unit/browser/webkit/http/test_http_hypothesis.py59
7 files changed, 151 insertions, 206 deletions
diff --git a/qutebrowser/browser/webkit/http.py b/qutebrowser/browser/webkit/http.py
index c2860eea1..b96614d0a 100644
--- a/qutebrowser/browser/webkit/http.py
+++ b/qutebrowser/browser/webkit/http.py
@@ -19,13 +19,118 @@
"""Parsing functions for various HTTP headers."""
-
+import email.headerregistry
+import email.errors
+import dataclasses
import os.path
+from typing import Type
from PyQt5.QtNetwork import QNetworkRequest
-from qutebrowser.utils import log
-from qutebrowser.browser.webkit import rfc6266
+from qutebrowser.utils import log, utils
+
+
+class ContentDispositionError(Exception):
+
+ """Base class for RFC6266 errors."""
+
+
+@dataclasses.dataclass
+class DefectWrapper:
+
+ """Wrapper around a email.error for comparison."""
+
+ error_class: Type[email.errors.MessageDefect]
+ line: str
+
+ def __eq__(self, other):
+ return (
+ isinstance(other, self.error_class)
+ and other.line == self.line # type: ignore[attr-defined]
+ )
+
+
+class ContentDisposition:
+
+ """Records various indications and hints about content disposition.
+
+ These can be used to know if a file should be downloaded or
+ displayed directly, and to hint what filename it should have
+ in the download case.
+ """
+
+ # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth
+ # one which has a slightly different wording ("duplicate(s) ignored" instead of
+ # "duplicate ignored"), because even if we did ignore that one, it still wouldn't
+ # work properly...
+ _IGNORED_DEFECT = DefectWrapper(
+ email.errors.InvalidHeaderDefect, # type: ignore[attr-defined]
+ 'duplicate parameter name; duplicate ignored'
+ )
+
+ def __init__(self, disposition, params):
+ """Used internally after parsing the header."""
+ self.disposition = disposition
+ self.params = params
+ assert 'filename*' not in self.params # Handled by headerregistry
+
+ @classmethod
+ def parse(cls, value):
+ """Build a _ContentDisposition from header values."""
+ # We allow non-ascii here (it will only be parsed inside of qdtext, and
+ # rejected by the grammar if it appears in other places), although parsing
+ # it can be ambiguous. Parsing it ensures that a non-ambiguous filename*
+ # value won't get dismissed because of an unrelated ambiguity in the
+ # filename parameter. But it does mean we occasionally give
+ # less-than-certain values for some legacy senders.
+ try:
+ decoded = value.decode('iso-8859-1')
+ except UnicodeDecodeError as e:
+ raise ContentDispositionError(e)
+
+ reg = email.headerregistry.HeaderRegistry()
+
+ try:
+ parsed = reg('Content-Disposition', decoded)
+ except IndexError:
+ # WORKAROUND for https://bugs.python.org/issue37491
+ # Fixed in Python 3.7.5 and 3.8.0.
+ raise ContentDispositionError("Missing closing quote character")
+
+ if parsed.defects:
+ defects = list(parsed.defects)
+ if defects != [cls._IGNORED_DEFECT]: # type: ignore[comparison-overlap]
+ raise ContentDispositionError(defects)
+
+ assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed
+ return cls(disposition=parsed.content_disposition, params=parsed.params)
+
+ def filename(self):
+ """The filename from the Content-Disposition header or None.
+
+ On safety:
+
+ This property records the intent of the sender.
+
+ You shouldn't use this sender-controlled value as a filesystem path, it
+ can be insecure. Serving files with this filename can be dangerous as
+ well, due to a certain browser using the part after the dot for
+ mime-sniffing. Saving it to a database is fine by itself though.
+ """
+ return self.params.get('filename')
+
+ def is_inline(self):
+ """Return if the file should be handled inline.
+
+ If not, and unless your application supports other dispositions
+ than the standard inline and attachment, it should be handled
+ as an attachment.
+ """
+ return self.disposition in {None, 'inline'}
+
+ def __repr__(self):
+ return utils.get_repr(self, constructor=True,
+ disposition=self.disposition, params=self.params)
def parse_content_disposition(reply):
@@ -47,11 +152,11 @@ def parse_content_disposition(reply):
# os.path.basename later.
try:
value = bytes(reply.rawHeader(content_disposition_header))
- log.rfc6266.debug("Parsing Content-Disposition: {value!r}")
- content_disposition = rfc6266.ContentDisposition.parse(value)
+ log.network.debug("Parsing Content-Disposition: {value!r}")
+ content_disposition = ContentDisposition.parse(value)
filename = content_disposition.filename()
- except rfc6266.Error as e:
- log.rfc6266.error(f"Error while parsing filename: {e}")
+ except ContentDispositionError as e:
+ log.network.error(f"Error while parsing filename: {e}")
else:
is_inline = content_disposition.is_inline()
# Then try to get filename from url
diff --git a/qutebrowser/browser/webkit/rfc6266.py b/qutebrowser/browser/webkit/rfc6266.py
deleted file mode 100644
index cc5ba31e1..000000000
--- a/qutebrowser/browser/webkit/rfc6266.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
-
-# Copyright 2014-2020 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
-#
-# This file is part of qutebrowser.
-#
-# qutebrowser is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# qutebrowser is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
-
-"""Parsing for the RFC 6266 (Content-Disposition) header."""
-
-import email.headerregistry
-import email.errors
-import dataclasses
-from typing import Type
-
-from qutebrowser.utils import utils
-
-
-class Error(Exception):
-
- """Base class for RFC6266 errors."""
-
-
-@dataclasses.dataclass
-class DefectWrapper:
-
- """Wrapper around a email.error for comparison."""
-
- error_class: Type[email.errors.MessageDefect]
- line: str
-
- def __eq__(self, other):
- return (
- isinstance(other, self.error_class)
- and other.line == self.line # type: ignore[attr-defined]
- )
-
-
-class ContentDisposition:
-
- """Records various indications and hints about content disposition.
-
- These can be used to know if a file should be downloaded or
- displayed directly, and to hint what filename it should have
- in the download case.
- """
-
- # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth
- # one which has a slightly different wording ("duplicate(s) ignored" instead of
- # "duplicate ignored"), because even if we did ignore that one, it still wouldn't
- # work properly...
- _IGNORED_DEFECT = DefectWrapper(
- email.errors.InvalidHeaderDefect, # type: ignore[attr-defined]
- 'duplicate parameter name; duplicate ignored'
- )
-
- def __init__(self, disposition, params):
- """Used internally after parsing the header."""
- self.disposition = disposition
- self.params = params
- assert 'filename*' not in self.params # Handled by headerregistry
-
- @classmethod
- def parse(cls, value):
- """Build a _ContentDisposition from header values."""
- # We allow non-ascii here (it will only be parsed inside of qdtext, and
- # rejected by the grammar if it appears in other places), although parsing
- # it can be ambiguous. Parsing it ensures that a non-ambiguous filename*
- # value won't get dismissed because of an unrelated ambiguity in the
- # filename parameter. But it does mean we occasionally give
- # less-than-certain values for some legacy senders.
- try:
- decoded = value.decode('iso-8859-1')
- except UnicodeDecodeError as e:
- raise Error(e)
-
- reg = email.headerregistry.HeaderRegistry()
-
- try:
- parsed = reg('Content-Disposition', decoded)
- except IndexError:
- # WORKAROUND for https://bugs.python.org/issue37491
- # Fixed in Python 3.7.5 and 3.8.0.
- raise Error("Missing closing quote character")
-
- if parsed.defects:
- defects = list(parsed.defects)
- if defects != [cls._IGNORED_DEFECT]: # type: ignore[comparison-overlap]
- raise Error(defects)
-
- assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed
- return cls(disposition=parsed.content_disposition, params=parsed.params)
-
- def filename(self):
- """The filename from the Content-Disposition header or None.
-
- On safety:
-
- This property records the intent of the sender.
-
- You shouldn't use this sender-controlled value as a filesystem path, it
- can be insecure. Serving files with this filename can be dangerous as
- well, due to a certain browser using the part after the dot for
- mime-sniffing. Saving it to a database is fine by itself though.
- """
- return self.params.get('filename')
-
- def is_inline(self):
- """Return if the file should be handled inline.
-
- If not, and unless your application supports other dispositions
- than the standard inline and attachment, it should be handled
- as an attachment.
- """
- return self.disposition in {None, 'inline'}
-
- def __repr__(self):
- return utils.get_repr(self, constructor=True,
- disposition=self.disposition, params=self.params)
diff --git a/qutebrowser/utils/log.py b/qutebrowser/utils/log.py
index fa6d9beaf..338b370dc 100644
--- a/qutebrowser/utils/log.py
+++ b/qutebrowser/utils/log.py
@@ -134,7 +134,6 @@ keyboard = logging.getLogger('keyboard')
downloads = logging.getLogger('downloads')
js = logging.getLogger('js') # Javascript console messages
qt = logging.getLogger('qt') # Warnings produced by Qt
-rfc6266 = logging.getLogger('rfc6266')
ipc = logging.getLogger('ipc')
shlexer = logging.getLogger('shlexer')
save = logging.getLogger('save')
@@ -153,7 +152,7 @@ LOGGER_NAMES = [
'destroy', 'modes', 'webview', 'misc',
'mouse', 'procs', 'hints', 'keyboard',
'commands', 'signals', 'downloads',
- 'js', 'qt', 'rfc6266', 'ipc', 'shlexer',
+ 'js', 'qt', 'ipc', 'shlexer',
'save', 'message', 'config', 'sessions',
'webelem', 'prompt', 'network', 'sql',
'greasemonkey', 'extensions',
diff --git a/scripts/dev/check_coverage.py b/scripts/dev/check_coverage.py
index 416628c45..1c9bf7c8e 100644
--- a/scripts/dev/check_coverage.py
+++ b/scripts/dev/check_coverage.py
@@ -90,8 +90,6 @@ PERFECT_FILES = [
'qutebrowser/browser/pdfjs.py'),
('tests/unit/browser/webkit/http/test_http.py',
'qutebrowser/browser/webkit/http.py'),
- ('tests/unit/browser/webkit/http/test_content_disposition.py',
- 'qutebrowser/browser/webkit/rfc6266.py'),
# ('tests/unit/browser/webkit/test_webkitelem.py',
# 'qutebrowser/browser/webkit/webkitelem.py'),
# ('tests/unit/browser/webkit/test_webkitelem.py',
diff --git a/tests/unit/browser/webkit/http/test_content_disposition.py b/tests/unit/browser/webkit/http/test_content_disposition.py
index 15d4f3e54..4957c960a 100644
--- a/tests/unit/browser/webkit/http/test_content_disposition.py
+++ b/tests/unit/browser/webkit/http/test_content_disposition.py
@@ -54,8 +54,7 @@ class HeaderChecker:
"""Check if the passed header is ignored."""
reply = self.stubs.FakeNetworkReply(
headers={'Content-Disposition': header})
- with self.caplog.at_level(logging.ERROR, 'rfc6266'):
- # with self.assertLogs(log.rfc6266, logging.ERROR):
+ with self.caplog.at_level(logging.ERROR, 'network'):
cd_inline, cd_filename = http.parse_content_disposition(reply)
assert cd_filename == DEFAULT_NAME
assert cd_inline
diff --git a/tests/unit/browser/webkit/http/test_http.py b/tests/unit/browser/webkit/http/test_http.py
index 4da7c3986..ce1ae9419 100644
--- a/tests/unit/browser/webkit/http/test_http.py
+++ b/tests/unit/browser/webkit/http/test_http.py
@@ -17,13 +17,13 @@
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
-"""Tests for qutebrowser.browser.webkit.http.
+"""Tests for qutebrowser.browser.webkit.http."""
-Note that tests for parse_content_disposition are in their own
-test_content_disposition.py file.
-"""
+import logging
import pytest
+import hypothesis
+from hypothesis import strategies
from PyQt5.QtCore import QUrl
from qutebrowser.browser.webkit import http
@@ -44,6 +44,33 @@ def test_no_content_disposition(stubs, url, expected):
assert filename == expected
+@pytest.mark.parametrize('template', [
+ '{}',
+ 'attachment; filename="{}"',
+ 'inline; {}',
+ 'attachment; {}="foo"',
+ "attachment; filename*=iso-8859-1''{}",
+ 'attachment; filename*={}',
+])
+@hypothesis.given(strategies.text(alphabet=[chr(x) for x in range(255)]))
+def test_parse_content_disposition_hypothesis(caplog, template, stubs, s):
+ """Test parsing headers based on templates which hypothesis completes."""
+ header = template.format(s)
+ reply = stubs.FakeNetworkReply(headers={'Content-Disposition': header})
+ with caplog.at_level(logging.ERROR, 'network'):
+ http.parse_content_disposition(reply)
+
+
+@hypothesis.given(strategies.binary())
+def test_content_disposition_directly_hypothesis(s):
+ """Test rfc6266 parsing directly with binary data."""
+ try:
+ cd = http.ContentDisposition.parse(s)
+ cd.filename()
+ except (SyntaxError, UnicodeDecodeError, http.ContentDispositionError):
+ pass
+
+
@pytest.mark.parametrize('content_type, expected_mimetype, expected_rest', [
(None, None, None),
('image/example', 'image/example', None),
@@ -59,3 +86,9 @@ def test_parse_content_type(stubs, content_type, expected_mimetype,
mimetype, rest = http.parse_content_type(reply)
assert mimetype == expected_mimetype
assert rest == expected_rest
+
+
+@hypothesis.given(strategies.text())
+def test_parse_content_type_hypothesis(stubs, s):
+ reply = stubs.FakeNetworkReply(headers={'Content-Type': s})
+ http.parse_content_type(reply)
diff --git a/tests/unit/browser/webkit/http/test_http_hypothesis.py b/tests/unit/browser/webkit/http/test_http_hypothesis.py
deleted file mode 100644
index d46bfe897..000000000
--- a/tests/unit/browser/webkit/http/test_http_hypothesis.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
-
-# Copyright 2015-2020 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
-#
-# This file is part of qutebrowser.
-#
-# qutebrowser is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# qutebrowser is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
-
-import logging
-
-import pytest
-import hypothesis
-from hypothesis import strategies
-
-from qutebrowser.browser.webkit import http, rfc6266
-
-
-@pytest.mark.parametrize('template', [
- '{}',
- 'attachment; filename="{}"',
- 'inline; {}',
- 'attachment; {}="foo"',
- "attachment; filename*=iso-8859-1''{}",
- 'attachment; filename*={}',
-])
-@hypothesis.given(strategies.text(alphabet=[chr(x) for x in range(255)]))
-def test_parse_content_disposition(caplog, template, stubs, s):
- """Test parsing headers based on templates which hypothesis completes."""
- header = template.format(s)
- reply = stubs.FakeNetworkReply(headers={'Content-Disposition': header})
- with caplog.at_level(logging.ERROR, 'rfc6266'):
- http.parse_content_disposition(reply)
-
-
-@hypothesis.given(strategies.binary())
-def test_content_disposition_directly(s):
- """Test rfc6266 parsing directly with binary data."""
- try:
- cd = rfc6266.ContentDisposition.parse(s)
- cd.filename()
- except (SyntaxError, UnicodeDecodeError, rfc6266.Error):
- pass
-
-
-@hypothesis.given(strategies.text())
-def test_parse_content_type(stubs, s):
- reply = stubs.FakeNetworkReply(headers={'Content-Type': s})
- http.parse_content_type(reply)