From a3c0a8759ac336ba58c35ba75e0d42404f331446 Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Sun, 17 Jan 2021 16:43:19 +0100 Subject: rfc6266: Consolidate files --- qutebrowser/browser/webkit/http.py | 119 +++++++++++++++++-- qutebrowser/browser/webkit/rfc6266.py | 130 --------------------- qutebrowser/utils/log.py | 3 +- scripts/dev/check_coverage.py | 2 - .../webkit/http/test_content_disposition.py | 3 +- tests/unit/browser/webkit/http/test_http.py | 41 ++++++- .../browser/webkit/http/test_http_hypothesis.py | 59 ---------- 7 files changed, 151 insertions(+), 206 deletions(-) delete mode 100644 qutebrowser/browser/webkit/rfc6266.py delete mode 100644 tests/unit/browser/webkit/http/test_http_hypothesis.py diff --git a/qutebrowser/browser/webkit/http.py b/qutebrowser/browser/webkit/http.py index c2860eea1..b96614d0a 100644 --- a/qutebrowser/browser/webkit/http.py +++ b/qutebrowser/browser/webkit/http.py @@ -19,13 +19,118 @@ """Parsing functions for various HTTP headers.""" - +import email.headerregistry +import email.errors +import dataclasses import os.path +from typing import Type from PyQt5.QtNetwork import QNetworkRequest -from qutebrowser.utils import log -from qutebrowser.browser.webkit import rfc6266 +from qutebrowser.utils import log, utils + + +class ContentDispositionError(Exception): + + """Base class for RFC6266 errors.""" + + +@dataclasses.dataclass +class DefectWrapper: + + """Wrapper around a email.error for comparison.""" + + error_class: Type[email.errors.MessageDefect] + line: str + + def __eq__(self, other): + return ( + isinstance(other, self.error_class) + and other.line == self.line # type: ignore[attr-defined] + ) + + +class ContentDisposition: + + """Records various indications and hints about content disposition. + + These can be used to know if a file should be downloaded or + displayed directly, and to hint what filename it should have + in the download case. + """ + + # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth + # one which has a slightly different wording ("duplicate(s) ignored" instead of + # "duplicate ignored"), because even if we did ignore that one, it still wouldn't + # work properly... + _IGNORED_DEFECT = DefectWrapper( + email.errors.InvalidHeaderDefect, # type: ignore[attr-defined] + 'duplicate parameter name; duplicate ignored' + ) + + def __init__(self, disposition, params): + """Used internally after parsing the header.""" + self.disposition = disposition + self.params = params + assert 'filename*' not in self.params # Handled by headerregistry + + @classmethod + def parse(cls, value): + """Build a _ContentDisposition from header values.""" + # We allow non-ascii here (it will only be parsed inside of qdtext, and + # rejected by the grammar if it appears in other places), although parsing + # it can be ambiguous. Parsing it ensures that a non-ambiguous filename* + # value won't get dismissed because of an unrelated ambiguity in the + # filename parameter. But it does mean we occasionally give + # less-than-certain values for some legacy senders. + try: + decoded = value.decode('iso-8859-1') + except UnicodeDecodeError as e: + raise ContentDispositionError(e) + + reg = email.headerregistry.HeaderRegistry() + + try: + parsed = reg('Content-Disposition', decoded) + except IndexError: + # WORKAROUND for https://bugs.python.org/issue37491 + # Fixed in Python 3.7.5 and 3.8.0. + raise ContentDispositionError("Missing closing quote character") + + if parsed.defects: + defects = list(parsed.defects) + if defects != [cls._IGNORED_DEFECT]: # type: ignore[comparison-overlap] + raise ContentDispositionError(defects) + + assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed + return cls(disposition=parsed.content_disposition, params=parsed.params) + + def filename(self): + """The filename from the Content-Disposition header or None. + + On safety: + + This property records the intent of the sender. + + You shouldn't use this sender-controlled value as a filesystem path, it + can be insecure. Serving files with this filename can be dangerous as + well, due to a certain browser using the part after the dot for + mime-sniffing. Saving it to a database is fine by itself though. + """ + return self.params.get('filename') + + def is_inline(self): + """Return if the file should be handled inline. + + If not, and unless your application supports other dispositions + than the standard inline and attachment, it should be handled + as an attachment. + """ + return self.disposition in {None, 'inline'} + + def __repr__(self): + return utils.get_repr(self, constructor=True, + disposition=self.disposition, params=self.params) def parse_content_disposition(reply): @@ -47,11 +152,11 @@ def parse_content_disposition(reply): # os.path.basename later. try: value = bytes(reply.rawHeader(content_disposition_header)) - log.rfc6266.debug("Parsing Content-Disposition: {value!r}") - content_disposition = rfc6266.ContentDisposition.parse(value) + log.network.debug("Parsing Content-Disposition: {value!r}") + content_disposition = ContentDisposition.parse(value) filename = content_disposition.filename() - except rfc6266.Error as e: - log.rfc6266.error(f"Error while parsing filename: {e}") + except ContentDispositionError as e: + log.network.error(f"Error while parsing filename: {e}") else: is_inline = content_disposition.is_inline() # Then try to get filename from url diff --git a/qutebrowser/browser/webkit/rfc6266.py b/qutebrowser/browser/webkit/rfc6266.py deleted file mode 100644 index cc5ba31e1..000000000 --- a/qutebrowser/browser/webkit/rfc6266.py +++ /dev/null @@ -1,130 +0,0 @@ -# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: - -# Copyright 2014-2020 Florian Bruhin (The Compiler) -# -# This file is part of qutebrowser. -# -# qutebrowser is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# qutebrowser is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with qutebrowser. If not, see . - -"""Parsing for the RFC 6266 (Content-Disposition) header.""" - -import email.headerregistry -import email.errors -import dataclasses -from typing import Type - -from qutebrowser.utils import utils - - -class Error(Exception): - - """Base class for RFC6266 errors.""" - - -@dataclasses.dataclass -class DefectWrapper: - - """Wrapper around a email.error for comparison.""" - - error_class: Type[email.errors.MessageDefect] - line: str - - def __eq__(self, other): - return ( - isinstance(other, self.error_class) - and other.line == self.line # type: ignore[attr-defined] - ) - - -class ContentDisposition: - - """Records various indications and hints about content disposition. - - These can be used to know if a file should be downloaded or - displayed directly, and to hint what filename it should have - in the download case. - """ - - # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth - # one which has a slightly different wording ("duplicate(s) ignored" instead of - # "duplicate ignored"), because even if we did ignore that one, it still wouldn't - # work properly... - _IGNORED_DEFECT = DefectWrapper( - email.errors.InvalidHeaderDefect, # type: ignore[attr-defined] - 'duplicate parameter name; duplicate ignored' - ) - - def __init__(self, disposition, params): - """Used internally after parsing the header.""" - self.disposition = disposition - self.params = params - assert 'filename*' not in self.params # Handled by headerregistry - - @classmethod - def parse(cls, value): - """Build a _ContentDisposition from header values.""" - # We allow non-ascii here (it will only be parsed inside of qdtext, and - # rejected by the grammar if it appears in other places), although parsing - # it can be ambiguous. Parsing it ensures that a non-ambiguous filename* - # value won't get dismissed because of an unrelated ambiguity in the - # filename parameter. But it does mean we occasionally give - # less-than-certain values for some legacy senders. - try: - decoded = value.decode('iso-8859-1') - except UnicodeDecodeError as e: - raise Error(e) - - reg = email.headerregistry.HeaderRegistry() - - try: - parsed = reg('Content-Disposition', decoded) - except IndexError: - # WORKAROUND for https://bugs.python.org/issue37491 - # Fixed in Python 3.7.5 and 3.8.0. - raise Error("Missing closing quote character") - - if parsed.defects: - defects = list(parsed.defects) - if defects != [cls._IGNORED_DEFECT]: # type: ignore[comparison-overlap] - raise Error(defects) - - assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed - return cls(disposition=parsed.content_disposition, params=parsed.params) - - def filename(self): - """The filename from the Content-Disposition header or None. - - On safety: - - This property records the intent of the sender. - - You shouldn't use this sender-controlled value as a filesystem path, it - can be insecure. Serving files with this filename can be dangerous as - well, due to a certain browser using the part after the dot for - mime-sniffing. Saving it to a database is fine by itself though. - """ - return self.params.get('filename') - - def is_inline(self): - """Return if the file should be handled inline. - - If not, and unless your application supports other dispositions - than the standard inline and attachment, it should be handled - as an attachment. - """ - return self.disposition in {None, 'inline'} - - def __repr__(self): - return utils.get_repr(self, constructor=True, - disposition=self.disposition, params=self.params) diff --git a/qutebrowser/utils/log.py b/qutebrowser/utils/log.py index fa6d9beaf..338b370dc 100644 --- a/qutebrowser/utils/log.py +++ b/qutebrowser/utils/log.py @@ -134,7 +134,6 @@ keyboard = logging.getLogger('keyboard') downloads = logging.getLogger('downloads') js = logging.getLogger('js') # Javascript console messages qt = logging.getLogger('qt') # Warnings produced by Qt -rfc6266 = logging.getLogger('rfc6266') ipc = logging.getLogger('ipc') shlexer = logging.getLogger('shlexer') save = logging.getLogger('save') @@ -153,7 +152,7 @@ LOGGER_NAMES = [ 'destroy', 'modes', 'webview', 'misc', 'mouse', 'procs', 'hints', 'keyboard', 'commands', 'signals', 'downloads', - 'js', 'qt', 'rfc6266', 'ipc', 'shlexer', + 'js', 'qt', 'ipc', 'shlexer', 'save', 'message', 'config', 'sessions', 'webelem', 'prompt', 'network', 'sql', 'greasemonkey', 'extensions', diff --git a/scripts/dev/check_coverage.py b/scripts/dev/check_coverage.py index 416628c45..1c9bf7c8e 100644 --- a/scripts/dev/check_coverage.py +++ b/scripts/dev/check_coverage.py @@ -90,8 +90,6 @@ PERFECT_FILES = [ 'qutebrowser/browser/pdfjs.py'), ('tests/unit/browser/webkit/http/test_http.py', 'qutebrowser/browser/webkit/http.py'), - ('tests/unit/browser/webkit/http/test_content_disposition.py', - 'qutebrowser/browser/webkit/rfc6266.py'), # ('tests/unit/browser/webkit/test_webkitelem.py', # 'qutebrowser/browser/webkit/webkitelem.py'), # ('tests/unit/browser/webkit/test_webkitelem.py', diff --git a/tests/unit/browser/webkit/http/test_content_disposition.py b/tests/unit/browser/webkit/http/test_content_disposition.py index 15d4f3e54..4957c960a 100644 --- a/tests/unit/browser/webkit/http/test_content_disposition.py +++ b/tests/unit/browser/webkit/http/test_content_disposition.py @@ -54,8 +54,7 @@ class HeaderChecker: """Check if the passed header is ignored.""" reply = self.stubs.FakeNetworkReply( headers={'Content-Disposition': header}) - with self.caplog.at_level(logging.ERROR, 'rfc6266'): - # with self.assertLogs(log.rfc6266, logging.ERROR): + with self.caplog.at_level(logging.ERROR, 'network'): cd_inline, cd_filename = http.parse_content_disposition(reply) assert cd_filename == DEFAULT_NAME assert cd_inline diff --git a/tests/unit/browser/webkit/http/test_http.py b/tests/unit/browser/webkit/http/test_http.py index 4da7c3986..ce1ae9419 100644 --- a/tests/unit/browser/webkit/http/test_http.py +++ b/tests/unit/browser/webkit/http/test_http.py @@ -17,13 +17,13 @@ # You should have received a copy of the GNU General Public License # along with qutebrowser. If not, see . -"""Tests for qutebrowser.browser.webkit.http. +"""Tests for qutebrowser.browser.webkit.http.""" -Note that tests for parse_content_disposition are in their own -test_content_disposition.py file. -""" +import logging import pytest +import hypothesis +from hypothesis import strategies from PyQt5.QtCore import QUrl from qutebrowser.browser.webkit import http @@ -44,6 +44,33 @@ def test_no_content_disposition(stubs, url, expected): assert filename == expected +@pytest.mark.parametrize('template', [ + '{}', + 'attachment; filename="{}"', + 'inline; {}', + 'attachment; {}="foo"', + "attachment; filename*=iso-8859-1''{}", + 'attachment; filename*={}', +]) +@hypothesis.given(strategies.text(alphabet=[chr(x) for x in range(255)])) +def test_parse_content_disposition_hypothesis(caplog, template, stubs, s): + """Test parsing headers based on templates which hypothesis completes.""" + header = template.format(s) + reply = stubs.FakeNetworkReply(headers={'Content-Disposition': header}) + with caplog.at_level(logging.ERROR, 'network'): + http.parse_content_disposition(reply) + + +@hypothesis.given(strategies.binary()) +def test_content_disposition_directly_hypothesis(s): + """Test rfc6266 parsing directly with binary data.""" + try: + cd = http.ContentDisposition.parse(s) + cd.filename() + except (SyntaxError, UnicodeDecodeError, http.ContentDispositionError): + pass + + @pytest.mark.parametrize('content_type, expected_mimetype, expected_rest', [ (None, None, None), ('image/example', 'image/example', None), @@ -59,3 +86,9 @@ def test_parse_content_type(stubs, content_type, expected_mimetype, mimetype, rest = http.parse_content_type(reply) assert mimetype == expected_mimetype assert rest == expected_rest + + +@hypothesis.given(strategies.text()) +def test_parse_content_type_hypothesis(stubs, s): + reply = stubs.FakeNetworkReply(headers={'Content-Type': s}) + http.parse_content_type(reply) diff --git a/tests/unit/browser/webkit/http/test_http_hypothesis.py b/tests/unit/browser/webkit/http/test_http_hypothesis.py deleted file mode 100644 index d46bfe897..000000000 --- a/tests/unit/browser/webkit/http/test_http_hypothesis.py +++ /dev/null @@ -1,59 +0,0 @@ -# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: - -# Copyright 2015-2020 Florian Bruhin (The Compiler) -# -# This file is part of qutebrowser. -# -# qutebrowser is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# qutebrowser is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with qutebrowser. If not, see . - -import logging - -import pytest -import hypothesis -from hypothesis import strategies - -from qutebrowser.browser.webkit import http, rfc6266 - - -@pytest.mark.parametrize('template', [ - '{}', - 'attachment; filename="{}"', - 'inline; {}', - 'attachment; {}="foo"', - "attachment; filename*=iso-8859-1''{}", - 'attachment; filename*={}', -]) -@hypothesis.given(strategies.text(alphabet=[chr(x) for x in range(255)])) -def test_parse_content_disposition(caplog, template, stubs, s): - """Test parsing headers based on templates which hypothesis completes.""" - header = template.format(s) - reply = stubs.FakeNetworkReply(headers={'Content-Disposition': header}) - with caplog.at_level(logging.ERROR, 'rfc6266'): - http.parse_content_disposition(reply) - - -@hypothesis.given(strategies.binary()) -def test_content_disposition_directly(s): - """Test rfc6266 parsing directly with binary data.""" - try: - cd = rfc6266.ContentDisposition.parse(s) - cd.filename() - except (SyntaxError, UnicodeDecodeError, rfc6266.Error): - pass - - -@hypothesis.given(strategies.text()) -def test_parse_content_type(stubs, s): - reply = stubs.FakeNetworkReply(headers={'Content-Type': s}) - http.parse_content_type(reply) -- cgit v1.2.3-54-g00ecf