From 3c905f21fd2d6b1074f45bf8b2ad37a4455c447a Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Sun, 17 Jan 2021 16:35:52 +0100 Subject: rfc6266: Move parsing into class --- qutebrowser/browser/webkit/http.py | 2 +- qutebrowser/browser/webkit/rfc6266.py | 83 +++++++++++----------- .../browser/webkit/http/test_http_hypothesis.py | 4 +- 3 files changed, 44 insertions(+), 45 deletions(-) diff --git a/qutebrowser/browser/webkit/http.py b/qutebrowser/browser/webkit/http.py index 2bace9f3c..c2860eea1 100644 --- a/qutebrowser/browser/webkit/http.py +++ b/qutebrowser/browser/webkit/http.py @@ -48,7 +48,7 @@ def parse_content_disposition(reply): try: value = bytes(reply.rawHeader(content_disposition_header)) log.rfc6266.debug("Parsing Content-Disposition: {value!r}") - content_disposition = rfc6266.parse_headers(value) + content_disposition = rfc6266.ContentDisposition.parse(value) filename = content_disposition.filename() except rfc6266.Error as e: log.rfc6266.error(f"Error while parsing filename: {e}") diff --git a/qutebrowser/browser/webkit/rfc6266.py b/qutebrowser/browser/webkit/rfc6266.py index 234a1968f..cc5ba31e1 100644 --- a/qutebrowser/browser/webkit/rfc6266.py +++ b/qutebrowser/browser/webkit/rfc6266.py @@ -47,7 +47,7 @@ class DefectWrapper: ) -class _ContentDisposition: +class ContentDisposition: """Records various indications and hints about content disposition. @@ -56,12 +56,52 @@ class _ContentDisposition: in the download case. """ + # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth + # one which has a slightly different wording ("duplicate(s) ignored" instead of + # "duplicate ignored"), because even if we did ignore that one, it still wouldn't + # work properly... + _IGNORED_DEFECT = DefectWrapper( + email.errors.InvalidHeaderDefect, # type: ignore[attr-defined] + 'duplicate parameter name; duplicate ignored' + ) + def __init__(self, disposition, params): """Used internally after parsing the header.""" self.disposition = disposition self.params = params assert 'filename*' not in self.params # Handled by headerregistry + @classmethod + def parse(cls, value): + """Build a _ContentDisposition from header values.""" + # We allow non-ascii here (it will only be parsed inside of qdtext, and + # rejected by the grammar if it appears in other places), although parsing + # it can be ambiguous. Parsing it ensures that a non-ambiguous filename* + # value won't get dismissed because of an unrelated ambiguity in the + # filename parameter. But it does mean we occasionally give + # less-than-certain values for some legacy senders. + try: + decoded = value.decode('iso-8859-1') + except UnicodeDecodeError as e: + raise Error(e) + + reg = email.headerregistry.HeaderRegistry() + + try: + parsed = reg('Content-Disposition', decoded) + except IndexError: + # WORKAROUND for https://bugs.python.org/issue37491 + # Fixed in Python 3.7.5 and 3.8.0. + raise Error("Missing closing quote character") + + if parsed.defects: + defects = list(parsed.defects) + if defects != [cls._IGNORED_DEFECT]: # type: ignore[comparison-overlap] + raise Error(defects) + + assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed + return cls(disposition=parsed.content_disposition, params=parsed.params) + def filename(self): """The filename from the Content-Disposition header or None. @@ -88,44 +128,3 @@ class _ContentDisposition: def __repr__(self): return utils.get_repr(self, constructor=True, disposition=self.disposition, params=self.params) - - -# Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth one -# which has a slightly different wording ("duplicate(s) ignored" instead of "duplicate -# ignored"), because even if we did ignore that one, it still wouldn't work properly... -_IGNORED_DEFECT = DefectWrapper( - email.errors.InvalidHeaderDefect, # type: ignore[attr-defined] - 'duplicate parameter name; duplicate ignored' -) - - -def parse_headers(content_disposition): - """Build a _ContentDisposition from header values.""" - # We allow non-ascii here (it will only be parsed inside of qdtext, and - # rejected by the grammar if it appears in other places), although parsing - # it can be ambiguous. Parsing it ensures that a non-ambiguous filename* - # value won't get dismissed because of an unrelated ambiguity in the - # filename parameter. But it does mean we occasionally give - # less-than-certain values for some legacy senders. - try: - content_disposition = content_disposition.decode('iso-8859-1') - except UnicodeDecodeError as e: - raise Error(e) - - reg = email.headerregistry.HeaderRegistry() - - try: - parsed = reg('Content-Disposition', content_disposition) - except IndexError: - # WORKAROUND for https://bugs.python.org/issue37491 - # Fixed in Python 3.7.5 and 3.8.0. - raise Error("Missing closing quote character") - - if parsed.defects: - defects = list(parsed.defects) - if defects != [_IGNORED_DEFECT]: # type: ignore[comparison-overlap] - raise Error(defects) - - assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed - return _ContentDisposition(disposition=parsed.content_disposition, - params=parsed.params) diff --git a/tests/unit/browser/webkit/http/test_http_hypothesis.py b/tests/unit/browser/webkit/http/test_http_hypothesis.py index d4ae8055d..d46bfe897 100644 --- a/tests/unit/browser/webkit/http/test_http_hypothesis.py +++ b/tests/unit/browser/webkit/http/test_http_hypothesis.py @@ -45,9 +45,9 @@ def test_parse_content_disposition(caplog, template, stubs, s): @hypothesis.given(strategies.binary()) def test_content_disposition_directly(s): - """Test rfc6266.parse_headers directly with binary data.""" + """Test rfc6266 parsing directly with binary data.""" try: - cd = rfc6266.parse_headers(s) + cd = rfc6266.ContentDisposition.parse(s) cd.filename() except (SyntaxError, UnicodeDecodeError, rfc6266.Error): pass -- cgit v1.2.3-54-g00ecf