From 3c905f21fd2d6b1074f45bf8b2ad37a4455c447a Mon Sep 17 00:00:00 2001
From: Florian Bruhin <me@the-compiler.org>
Date: Sun, 17 Jan 2021 16:35:52 +0100
Subject: rfc6266: Move parsing into class

---
 qutebrowser/browser/webkit/http.py                 |  2 +-
 qutebrowser/browser/webkit/rfc6266.py              | 83 +++++++++++-----------
 .../browser/webkit/http/test_http_hypothesis.py    |  4 +-
 3 files changed, 44 insertions(+), 45 deletions(-)

diff --git a/qutebrowser/browser/webkit/http.py b/qutebrowser/browser/webkit/http.py
index 2bace9f3c..c2860eea1 100644
--- a/qutebrowser/browser/webkit/http.py
+++ b/qutebrowser/browser/webkit/http.py
@@ -48,7 +48,7 @@ def parse_content_disposition(reply):
         try:
             value = bytes(reply.rawHeader(content_disposition_header))
             log.rfc6266.debug("Parsing Content-Disposition: {value!r}")
-            content_disposition = rfc6266.parse_headers(value)
+            content_disposition = rfc6266.ContentDisposition.parse(value)
             filename = content_disposition.filename()
         except rfc6266.Error as e:
             log.rfc6266.error(f"Error while parsing filename: {e}")
diff --git a/qutebrowser/browser/webkit/rfc6266.py b/qutebrowser/browser/webkit/rfc6266.py
index 234a1968f..cc5ba31e1 100644
--- a/qutebrowser/browser/webkit/rfc6266.py
+++ b/qutebrowser/browser/webkit/rfc6266.py
@@ -47,7 +47,7 @@ class DefectWrapper:
         )
 
 
-class _ContentDisposition:
+class ContentDisposition:
 
     """Records various indications and hints about content disposition.
 
@@ -56,12 +56,52 @@ class _ContentDisposition:
     in the download case.
     """
 
+    # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth
+    # one which has a slightly different wording ("duplicate(s) ignored" instead of
+    # "duplicate ignored"), because even if we did ignore that one, it still wouldn't
+    # work properly...
+    _IGNORED_DEFECT = DefectWrapper(
+        email.errors.InvalidHeaderDefect,  # type: ignore[attr-defined]
+        'duplicate parameter name; duplicate ignored'
+    )
+
     def __init__(self, disposition, params):
         """Used internally after parsing the header."""
         self.disposition = disposition
         self.params = params
         assert 'filename*' not in self.params  # Handled by headerregistry
 
+    @classmethod
+    def parse(cls, value):
+        """Build a _ContentDisposition from header values."""
+        # We allow non-ascii here (it will only be parsed inside of qdtext, and
+        # rejected by the grammar if it appears in other places), although parsing
+        # it can be ambiguous.  Parsing it ensures that a non-ambiguous filename*
+        # value won't get dismissed because of an unrelated ambiguity in the
+        # filename parameter. But it does mean we occasionally give
+        # less-than-certain values for some legacy senders.
+        try:
+           decoded = value.decode('iso-8859-1')
+        except UnicodeDecodeError as e:
+            raise Error(e)
+
+        reg = email.headerregistry.HeaderRegistry()
+
+        try:
+            parsed = reg('Content-Disposition', decoded)
+        except IndexError:
+            # WORKAROUND for https://bugs.python.org/issue37491
+            # Fixed in Python 3.7.5 and 3.8.0.
+            raise Error("Missing closing quote character")
+
+        if parsed.defects:
+            defects = list(parsed.defects)
+            if defects != [cls._IGNORED_DEFECT]:  # type: ignore[comparison-overlap]
+                raise Error(defects)
+
+        assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed
+        return cls(disposition=parsed.content_disposition, params=parsed.params)
+
     def filename(self):
         """The filename from the Content-Disposition header or None.
 
@@ -88,44 +128,3 @@ class _ContentDisposition:
     def __repr__(self):
         return utils.get_repr(self, constructor=True,
                               disposition=self.disposition, params=self.params)
-
-
-# Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth one
-# which has a slightly different wording ("duplicate(s) ignored" instead of "duplicate
-# ignored"), because even if we did ignore that one, it still wouldn't work properly...
-_IGNORED_DEFECT = DefectWrapper(
-    email.errors.InvalidHeaderDefect,  # type: ignore[attr-defined]
-    'duplicate parameter name; duplicate ignored'
-)
-
-
-def parse_headers(content_disposition):
-    """Build a _ContentDisposition from header values."""
-    # We allow non-ascii here (it will only be parsed inside of qdtext, and
-    # rejected by the grammar if it appears in other places), although parsing
-    # it can be ambiguous.  Parsing it ensures that a non-ambiguous filename*
-    # value won't get dismissed because of an unrelated ambiguity in the
-    # filename parameter. But it does mean we occasionally give
-    # less-than-certain values for some legacy senders.
-    try:
-        content_disposition = content_disposition.decode('iso-8859-1')
-    except UnicodeDecodeError as e:
-        raise Error(e)
-
-    reg = email.headerregistry.HeaderRegistry()
-
-    try:
-        parsed = reg('Content-Disposition', content_disposition)
-    except IndexError:
-        # WORKAROUND for https://bugs.python.org/issue37491
-        # Fixed in Python 3.7.5 and 3.8.0.
-        raise Error("Missing closing quote character")
-
-    if parsed.defects:
-        defects = list(parsed.defects)
-        if defects != [_IGNORED_DEFECT]:  # type: ignore[comparison-overlap]
-            raise Error(defects)
-
-    assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed
-    return _ContentDisposition(disposition=parsed.content_disposition,
-                               params=parsed.params)
diff --git a/tests/unit/browser/webkit/http/test_http_hypothesis.py b/tests/unit/browser/webkit/http/test_http_hypothesis.py
index d4ae8055d..d46bfe897 100644
--- a/tests/unit/browser/webkit/http/test_http_hypothesis.py
+++ b/tests/unit/browser/webkit/http/test_http_hypothesis.py
@@ -45,9 +45,9 @@ def test_parse_content_disposition(caplog, template, stubs, s):
 
 @hypothesis.given(strategies.binary())
 def test_content_disposition_directly(s):
-    """Test rfc6266.parse_headers directly with binary data."""
+    """Test rfc6266 parsing directly with binary data."""
     try:
-        cd = rfc6266.parse_headers(s)
+        cd = rfc6266.ContentDisposition.parse(s)
         cd.filename()
     except (SyntaxError, UnicodeDecodeError, rfc6266.Error):
         pass
-- 
cgit v1.2.3-54-g00ecf