rfc6266: Consolidate files

author: Florian Bruhin <me@the-compiler.org> 2021-01-17 16:43:19 +0100
committer: Florian Bruhin <me@the-compiler.org> 2021-01-17 17:02:07 +0100
commit: a3c0a8759ac336ba58c35ba75e0d42404f331446 (patch)
tree: 4c6252197d702cd448469bb58fe30de120a926e9
parent: 3c905f21fd2d6b1074f45bf8b2ad37a4455c447a (diff)
download: qutebrowser-a3c0a8759ac336ba58c35ba75e0d42404f331446.tar.gz
qutebrowser-a3c0a8759ac336ba58c35ba75e0d42404f331446.zip
7 files changed, 151 insertions, 206 deletions
diff --git a/qutebrowser/browser/webkit/http.py b/qutebrowser/browser/webkit/http.py
index c2860eea1..b96614d0a 100644
--- a/qutebrowser/browser/webkit/http.py
+++ b/qutebrowser/browser/webkit/http.py
@@ -19,13 +19,118 @@
 
 """Parsing functions for various HTTP headers."""
 
-
+import email.headerregistry
+import email.errors
+import dataclasses
 import os.path
+from typing import Type
 
 from PyQt5.QtNetwork import QNetworkRequest
 
-from qutebrowser.utils import log
-from qutebrowser.browser.webkit import rfc6266
+from qutebrowser.utils import log, utils
+
+
+class ContentDispositionError(Exception):
+
+    """Base class for RFC6266 errors."""
+
+
+@dataclasses.dataclass
+class DefectWrapper:
+
+    """Wrapper around a email.error for comparison."""
+
+    error_class: Type[email.errors.MessageDefect]
+    line: str
+
+    def __eq__(self, other):
+        return (
+            isinstance(other, self.error_class)
+            and other.line == self.line  # type: ignore[attr-defined]
+        )
+
+
+class ContentDisposition:
+
+    """Records various indications and hints about content disposition.
+
+    These can be used to know if a file should be downloaded or
+    displayed directly, and to hint what filename it should have
+    in the download case.
+    """
+
+    # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth
+    # one which has a slightly different wording ("duplicate(s) ignored" instead of
+    # "duplicate ignored"), because even if we did ignore that one, it still wouldn't
+    # work properly...
+    _IGNORED_DEFECT = DefectWrapper(
+        email.errors.InvalidHeaderDefect,  # type: ignore[attr-defined]
+        'duplicate parameter name; duplicate ignored'
+    )
+
+    def __init__(self, disposition, params):
+        """Used internally after parsing the header."""
+        self.disposition = disposition
+        self.params = params
+        assert 'filename*' not in self.params  # Handled by headerregistry
+
+    @classmethod
+    def parse(cls, value):
+        """Build a _ContentDisposition from header values."""
+        # We allow non-ascii here (it will only be parsed inside of qdtext, and
+        # rejected by the grammar if it appears in other places), although parsing
+        # it can be ambiguous.  Parsing it ensures that a non-ambiguous filename*
+        # value won't get dismissed because of an unrelated ambiguity in the
+        # filename parameter. But it does mean we occasionally give
+        # less-than-certain values for some legacy senders.
+        try:
+            decoded = value.decode('iso-8859-1')
+        except UnicodeDecodeError as e:
+            raise ContentDispositionError(e)
+
+        reg = email.headerregistry.HeaderRegistry()
+
+        try:
+            parsed = reg('Content-Disposition', decoded)
+        except IndexError:
+            # WORKAROUND for https://bugs.python.org/issue37491
+            # Fixed in Python 3.7.5 and 3.8.0.
+            raise ContentDispositionError("Missing closing quote character")
+
+        if parsed.defects:
+            defects = list(parsed.defects)
+            if defects != [cls._IGNORED_DEFECT]:  # type: ignore[comparison-overlap]
+                raise ContentDispositionError(defects)
+
+        assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed
+        return cls(disposition=parsed.content_disposition, params=parsed.params)
+
+    def filename(self):
+        """The filename from the Content-Disposition header or None.
+
+        On safety:
+
+        This property records the intent of the sender.
+
+        You shouldn't use this sender-controlled value as a filesystem path, it
+        can be insecure. Serving files with this filename can be dangerous as
+        well, due to a certain browser using the part after the dot for
+        mime-sniffing.  Saving it to a database is fine by itself though.
+        """
+        return self.params.get('filename')
+
+    def is_inline(self):
+        """Return if the file should be handled inline.
+
+        If not, and unless your application supports other dispositions
+        than the standard inline and attachment, it should be handled
+        as an attachment.
+        """
+        return self.disposition in {None, 'inline'}
+
+    def __repr__(self):
+        return utils.get_repr(self, constructor=True,
+                              disposition=self.disposition, params=self.params)
 
 
 def parse_content_disposition(reply):
@@ -47,11 +152,11 @@ def parse_content_disposition(reply):
         # os.path.basename later.
         try:
             value = bytes(reply.rawHeader(content_disposition_header))
-            log.rfc6266.debug("Parsing Content-Disposition: {value!r}")
-            content_disposition = rfc6266.ContentDisposition.parse(value)
+            log.network.debug("Parsing Content-Disposition: {value!r}")
+            content_disposition = ContentDisposition.parse(value)
             filename = content_disposition.filename()
-        except rfc6266.Error as e:
-            log.rfc6266.error(f"Error while parsing filename: {e}")
+        except ContentDispositionError as e:
+            log.network.error(f"Error while parsing filename: {e}")
         else:
             is_inline = content_disposition.is_inline()
     # Then try to get filename from url
diff --git a/qutebrowser/browser/webkit/rfc6266.py b/qutebrowser/browser/webkit/rfc6266.py
deleted file mode 100644
index cc5ba31e1..000000000
--- a/qutebrowser/browser/webkit/rfc6266.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
-
-# Copyright 2014-2020 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
-#
-# This file is part of qutebrowser.
-#
-# qutebrowser is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# qutebrowser is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with qutebrowser.  If not, see <http://www.gnu.org/licenses/>.
-
-"""Parsing for the RFC 6266 (Content-Disposition) header."""
-
-import email.headerregistry
-import email.errors
-import dataclasses
-from typing import Type
-
-from qutebrowser.utils import utils
-
-
-class Error(Exception):
-
-    """Base class for RFC6266 errors."""
-
-
-@dataclasses.dataclass
-class DefectWrapper:
-
-    """Wrapper around a email.error for comparison."""
-
-    error_class: Type[email.errors.MessageDefect]
-    line: str
-
-    def __eq__(self, other):
-        return (
-            isinstance(other, self.error_class)
-            and other.line == self.line  # type: ignore[attr-defined]
-        )
-
-
-class ContentDisposition:
-
-    """Records various indications and hints about content disposition.
-
-    These can be used to know if a file should be downloaded or
-    displayed directly, and to hint what filename it should have
-    in the download case.
-    """
-
-    # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth
-    # one which has a slightly different wording ("duplicate(s) ignored" instead of
-    # "duplicate ignored"), because even if we did ignore that one, it still wouldn't
-    # work properly...
-    _IGNORED_DEFECT = DefectWrapper(
-        email.errors.InvalidHeaderDefect,  # type: ignore[attr-defined]
-        'duplicate parameter name; duplicate ignored'
-    )
-
-    def __init__(self, disposition, params):
-        """Used internally after parsing the header."""
-        self.disposition = disposition
-        self.params = params
-        assert 'filename*' not in self.params  # Handled by headerregistry
-
-    @classmethod
-    def parse(cls, value):
-        """Build a _ContentDisposition from header values."""
-        # We allow non-ascii here (it will only be parsed inside of qdtext, and
-        # rejected by the grammar if it appears in other places), although parsing
-        # it can be ambiguous.  Parsing it ensures that a non-ambiguous filename*
-        # value won't get dismissed because of an unrelated ambiguity in the
-        # filename parameter. But it does mean we occasionally give
-        # less-than-certain values for some legacy senders.
-        try:
-           decoded = value.decode('iso-8859-1')
-        except UnicodeDecodeError as e:
-            raise Error(e)
-
-        reg = email.headerregistry.HeaderRegistry()
-
-        try:
-            parsed = reg('Content-Disposition', decoded)
-        except IndexError:
-            # WORKAROUND for https://bugs.python.org/issue37491
-            # Fixed in Python 3.7.5 and 3.8.0.
-            raise Error("Missing closing quote character")
-
-        if parsed.defects:
-            defects = list(parsed.defects)
-            if defects != [cls._IGNORED_DEFECT]:  # type: ignore[comparison-overlap]
-                raise Error(defects)
-
-        assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed
-        return cls(disposition=parsed.content_disposition, params=parsed.params)
-
-    def filename(self):
-        """The filename from the Content-Disposition header or None.
-
-        On safety:
-
-        This property records the intent of the sender.
-
-        You shouldn't use this sender-controlled value as a filesystem path, it
-        can be insecure. Serving files with this filename can be dangerous as
-        well, due to a certain browser using the part after the dot for
-        mime-sniffing.  Saving it to a database is fine by itself though.
-        """
-        return self.params.get('filename')
-
-    def is_inline(self):
-        """Return if the file should be handled inline.
-
-        If not, and unless your application supports other dispositions
-        than the standard inline and attachment, it should be handled
-        as an attachment.
-        """
-        return self.disposition in {None, 'inline'}
-
-    def __repr__(self):
-        return utils.get_repr(self, constructor=True,
-                              disposition=self.disposition, params=self.params)
diff --git a/qutebrowser/utils/log.py b/qutebrowser/utils/log.py
index fa6d9beaf..338b370dc 100644
--- a/qutebrowser/utils/log.py
+++ b/qutebrowser/utils/log.py
@@ -134,7 +134,6 @@ keyboard = logging.getLogger('keyboard')
 downloads = logging.getLogger('downloads')
 js = logging.getLogger('js')  # Javascript console messages
 qt = logging.getLogger('qt')  # Warnings produced by Qt
-rfc6266 = logging.getLogger('rfc6266')
 ipc = logging.getLogger('ipc')
 shlexer = logging.getLogger('shlexer')
 save = logging.getLogger('save')
@@ -153,7 +152,7 @@ LOGGER_NAMES = [
     'destroy', 'modes', 'webview', 'misc',
     'mouse', 'procs', 'hints', 'keyboard',
     'commands', 'signals', 'downloads',
-    'js', 'qt', 'rfc6266', 'ipc', 'shlexer',
+    'js', 'qt', 'ipc', 'shlexer',
     'save', 'message', 'config', 'sessions',
     'webelem', 'prompt', 'network', 'sql',
     'greasemonkey', 'extensions',
diff --git a/scripts/dev/check_coverage.py b/scripts/dev/check_coverage.py
index 416628c45..1c9bf7c8e 100644
--- a/scripts/dev/check_coverage.py
+++ b/scripts/dev/check_coverage.py
@@ -90,8 +90,6 @@ PERFECT_FILES = [
      'qutebrowser/browser/pdfjs.py'),
     ('tests/unit/browser/webkit/http/test_http.py',
      'qutebrowser/browser/webkit/http.py'),
-    ('tests/unit/browser/webkit/http/test_content_disposition.py',
-     'qutebrowser/browser/webkit/rfc6266.py'),
     # ('tests/unit/browser/webkit/test_webkitelem.py',
     #  'qutebrowser/browser/webkit/webkitelem.py'),
     # ('tests/unit/browser/webkit/test_webkitelem.py',
diff --git a/tests/unit/browser/webkit/http/test_content_disposition.py b/tests/unit/browser/webkit/http/test_content_disposition.py
index 15d4f3e54..4957c960a 100644
--- a/tests/unit/browser/webkit/http/test_content_disposition.py
+++ b/tests/unit/browser/webkit/http/test_content_disposition.py
@@ -54,8 +54,7 @@ class HeaderChecker:
         """Check if the passed header is ignored."""
         reply = self.stubs.FakeNetworkReply(
             headers={'Content-Disposition': header})
-        with self.caplog.at_level(logging.ERROR, 'rfc6266'):
-            # with self.assertLogs(log.rfc6266, logging.ERROR):
+        with self.caplog.at_level(logging.ERROR, 'network'):
             cd_inline, cd_filename = http.parse_content_disposition(reply)
         assert cd_filename == DEFAULT_NAME
         assert cd_inline
diff --git a/tests/unit/browser/webkit/http/test_http.py b/tests/unit/browser/webkit/http/test_http.py
index 4da7c3986..ce1ae9419 100644
--- a/tests/unit/browser/webkit/http/test_http.py
+++ b/tests/unit/browser/webkit/http/test_http.py
@@ -17,13 +17,13 @@
 # You should have received a copy of the GNU General Public License
 # along with qutebrowser.  If not, see <http://www.gnu.org/licenses/>.
 
-"""Tests for qutebrowser.browser.webkit.http.
+"""Tests for qutebrowser.browser.webkit.http."""
 
-Note that tests for parse_content_disposition are in their own
-test_content_disposition.py file.
-"""
+import logging
 
 import pytest
+import hypothesis
+from hypothesis import strategies
 from PyQt5.QtCore import QUrl
 
 from qutebrowser.browser.webkit import http
@@ -44,6 +44,33 @@ def test_no_content_disposition(stubs, url, expected):
     assert filename == expected
 
 
+@pytest.mark.parametrize('template', [
+    '{}',
+    'attachment; filename="{}"',
+    'inline; {}',
+    'attachment; {}="foo"',
+    "attachment; filename*=iso-8859-1''{}",
+    'attachment; filename*={}',
+])
+@hypothesis.given(strategies.text(alphabet=[chr(x) for x in range(255)]))
+def test_parse_content_disposition_hypothesis(caplog, template, stubs, s):
+    """Test parsing headers based on templates which hypothesis completes."""
+    header = template.format(s)
+    reply = stubs.FakeNetworkReply(headers={'Content-Disposition': header})
+    with caplog.at_level(logging.ERROR, 'network'):
+        http.parse_content_disposition(reply)
+
+
+@hypothesis.given(strategies.binary())
+def test_content_disposition_directly_hypothesis(s):
+    """Test rfc6266 parsing directly with binary data."""
+    try:
+        cd = http.ContentDisposition.parse(s)
+        cd.filename()
+    except (SyntaxError, UnicodeDecodeError, http.ContentDispositionError):
+        pass
+
+
 @pytest.mark.parametrize('content_type, expected_mimetype, expected_rest', [
     (None, None, None),
     ('image/example', 'image/example', None),
@@ -59,3 +86,9 @@ def test_parse_content_type(stubs, content_type, expected_mimetype,
     mimetype, rest = http.parse_content_type(reply)
     assert mimetype == expected_mimetype
     assert rest == expected_rest
+
+
+@hypothesis.given(strategies.text())
+def test_parse_content_type_hypothesis(stubs, s):
+    reply = stubs.FakeNetworkReply(headers={'Content-Type': s})
+    http.parse_content_type(reply)
diff --git a/tests/unit/browser/webkit/http/test_http_hypothesis.py b/tests/unit/browser/webkit/http/test_http_hypothesis.py
deleted file mode 100644
index d46bfe897..000000000
--- a/tests/unit/browser/webkit/http/test_http_hypothesis.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
-
-# Copyright 2015-2020 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
-#
-# This file is part of qutebrowser.
-#
-# qutebrowser is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# qutebrowser is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with qutebrowser.  If not, see <http://www.gnu.org/licenses/>.
-
-import logging
-
-import pytest
-import hypothesis
-from hypothesis import strategies
-
-from qutebrowser.browser.webkit import http, rfc6266
-
-
-@pytest.mark.parametrize('template', [
-    '{}',
-    'attachment; filename="{}"',
-    'inline; {}',
-    'attachment; {}="foo"',
-    "attachment; filename*=iso-8859-1''{}",
-    'attachment; filename*={}',
-])
-@hypothesis.given(strategies.text(alphabet=[chr(x) for x in range(255)]))
-def test_parse_content_disposition(caplog, template, stubs, s):
-    """Test parsing headers based on templates which hypothesis completes."""
-    header = template.format(s)
-    reply = stubs.FakeNetworkReply(headers={'Content-Disposition': header})
-    with caplog.at_level(logging.ERROR, 'rfc6266'):
-        http.parse_content_disposition(reply)
-
-
-@hypothesis.given(strategies.binary())
-def test_content_disposition_directly(s):
-    """Test rfc6266 parsing directly with binary data."""
-    try:
-        cd = rfc6266.ContentDisposition.parse(s)
-        cd.filename()
-    except (SyntaxError, UnicodeDecodeError, rfc6266.Error):
-        pass
-
-
-@hypothesis.given(strategies.text())
-def test_parse_content_type(stubs, s):
-    reply = stubs.FakeNetworkReply(headers={'Content-Type': s})
-    http.parse_content_type(reply)
author	Florian Bruhin <me@the-compiler.org>	2021-01-17 16:43:19 +0100
committer	Florian Bruhin <me@the-compiler.org>	2021-01-17 17:02:07 +0100
commit	a3c0a8759ac336ba58c35ba75e0d42404f331446 (patch)
tree	4c6252197d702cd448469bb58fe30de120a926e9
parent	3c905f21fd2d6b1074f45bf8b2ad37a4455c447a (diff)
download	qutebrowser-a3c0a8759ac336ba58c35ba75e0d42404f331446.tar.gz qutebrowser-a3c0a8759ac336ba58c35ba75e0d42404f331446.zip