summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Bruhin <me@the-compiler.org>2021-01-17 16:35:52 +0100
committerFlorian Bruhin <me@the-compiler.org>2021-01-17 16:35:52 +0100
commit3c905f21fd2d6b1074f45bf8b2ad37a4455c447a (patch)
tree380f1deb72fd7564e114782057386bbce26e90ae
parentf9bb5f5e2118da3f8379429eed2bfa6c01f30d51 (diff)
downloadqutebrowser-3c905f21fd2d6b1074f45bf8b2ad37a4455c447a.tar.gz
qutebrowser-3c905f21fd2d6b1074f45bf8b2ad37a4455c447a.zip
rfc6266: Move parsing into class
-rw-r--r--qutebrowser/browser/webkit/http.py2
-rw-r--r--qutebrowser/browser/webkit/rfc6266.py83
-rw-r--r--tests/unit/browser/webkit/http/test_http_hypothesis.py4
3 files changed, 44 insertions, 45 deletions
diff --git a/qutebrowser/browser/webkit/http.py b/qutebrowser/browser/webkit/http.py
index 2bace9f3c..c2860eea1 100644
--- a/qutebrowser/browser/webkit/http.py
+++ b/qutebrowser/browser/webkit/http.py
@@ -48,7 +48,7 @@ def parse_content_disposition(reply):
try:
value = bytes(reply.rawHeader(content_disposition_header))
log.rfc6266.debug("Parsing Content-Disposition: {value!r}")
- content_disposition = rfc6266.parse_headers(value)
+ content_disposition = rfc6266.ContentDisposition.parse(value)
filename = content_disposition.filename()
except rfc6266.Error as e:
log.rfc6266.error(f"Error while parsing filename: {e}")
diff --git a/qutebrowser/browser/webkit/rfc6266.py b/qutebrowser/browser/webkit/rfc6266.py
index 234a1968f..cc5ba31e1 100644
--- a/qutebrowser/browser/webkit/rfc6266.py
+++ b/qutebrowser/browser/webkit/rfc6266.py
@@ -47,7 +47,7 @@ class DefectWrapper:
)
-class _ContentDisposition:
+class ContentDisposition:
"""Records various indications and hints about content disposition.
@@ -56,12 +56,52 @@ class _ContentDisposition:
in the download case.
"""
+ # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth
+ # one which has a slightly different wording ("duplicate(s) ignored" instead of
+ # "duplicate ignored"), because even if we did ignore that one, it still wouldn't
+ # work properly...
+ _IGNORED_DEFECT = DefectWrapper(
+ email.errors.InvalidHeaderDefect, # type: ignore[attr-defined]
+ 'duplicate parameter name; duplicate ignored'
+ )
+
def __init__(self, disposition, params):
"""Used internally after parsing the header."""
self.disposition = disposition
self.params = params
assert 'filename*' not in self.params # Handled by headerregistry
+ @classmethod
+ def parse(cls, value):
+ """Build a _ContentDisposition from header values."""
+ # We allow non-ascii here (it will only be parsed inside of qdtext, and
+ # rejected by the grammar if it appears in other places), although parsing
+ # it can be ambiguous. Parsing it ensures that a non-ambiguous filename*
+ # value won't get dismissed because of an unrelated ambiguity in the
+ # filename parameter. But it does mean we occasionally give
+ # less-than-certain values for some legacy senders.
+ try:
+ decoded = value.decode('iso-8859-1')
+ except UnicodeDecodeError as e:
+ raise Error(e)
+
+ reg = email.headerregistry.HeaderRegistry()
+
+ try:
+ parsed = reg('Content-Disposition', decoded)
+ except IndexError:
+ # WORKAROUND for https://bugs.python.org/issue37491
+ # Fixed in Python 3.7.5 and 3.8.0.
+ raise Error("Missing closing quote character")
+
+ if parsed.defects:
+ defects = list(parsed.defects)
+ if defects != [cls._IGNORED_DEFECT]: # type: ignore[comparison-overlap]
+ raise Error(defects)
+
+ assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed
+ return cls(disposition=parsed.content_disposition, params=parsed.params)
+
def filename(self):
"""The filename from the Content-Disposition header or None.
@@ -88,44 +128,3 @@ class _ContentDisposition:
def __repr__(self):
return utils.get_repr(self, constructor=True,
disposition=self.disposition, params=self.params)
-
-
-# Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth one
-# which has a slightly different wording ("duplicate(s) ignored" instead of "duplicate
-# ignored"), because even if we did ignore that one, it still wouldn't work properly...
-_IGNORED_DEFECT = DefectWrapper(
- email.errors.InvalidHeaderDefect, # type: ignore[attr-defined]
- 'duplicate parameter name; duplicate ignored'
-)
-
-
-def parse_headers(content_disposition):
- """Build a _ContentDisposition from header values."""
- # We allow non-ascii here (it will only be parsed inside of qdtext, and
- # rejected by the grammar if it appears in other places), although parsing
- # it can be ambiguous. Parsing it ensures that a non-ambiguous filename*
- # value won't get dismissed because of an unrelated ambiguity in the
- # filename parameter. But it does mean we occasionally give
- # less-than-certain values for some legacy senders.
- try:
- content_disposition = content_disposition.decode('iso-8859-1')
- except UnicodeDecodeError as e:
- raise Error(e)
-
- reg = email.headerregistry.HeaderRegistry()
-
- try:
- parsed = reg('Content-Disposition', content_disposition)
- except IndexError:
- # WORKAROUND for https://bugs.python.org/issue37491
- # Fixed in Python 3.7.5 and 3.8.0.
- raise Error("Missing closing quote character")
-
- if parsed.defects:
- defects = list(parsed.defects)
- if defects != [_IGNORED_DEFECT]: # type: ignore[comparison-overlap]
- raise Error(defects)
-
- assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed
- return _ContentDisposition(disposition=parsed.content_disposition,
- params=parsed.params)
diff --git a/tests/unit/browser/webkit/http/test_http_hypothesis.py b/tests/unit/browser/webkit/http/test_http_hypothesis.py
index d4ae8055d..d46bfe897 100644
--- a/tests/unit/browser/webkit/http/test_http_hypothesis.py
+++ b/tests/unit/browser/webkit/http/test_http_hypothesis.py
@@ -45,9 +45,9 @@ def test_parse_content_disposition(caplog, template, stubs, s):
@hypothesis.given(strategies.binary())
def test_content_disposition_directly(s):
- """Test rfc6266.parse_headers directly with binary data."""
+ """Test rfc6266 parsing directly with binary data."""
try:
- cd = rfc6266.parse_headers(s)
+ cd = rfc6266.ContentDisposition.parse(s)
cd.filename()
except (SyntaxError, UnicodeDecodeError, rfc6266.Error):
pass