1 files changed, 112 insertions, 7 deletions
diff --git a/qutebrowser/browser/webkit/http.py b/qutebrowser/browser/webkit/http.py
index c2860eea1..b96614d0a 100644
--- a/qutebrowser/browser/webkit/http.py
+++ b/qutebrowser/browser/webkit/http.py
@@ -19,13 +19,118 @@
 
 """Parsing functions for various HTTP headers."""
 
-
+import email.headerregistry
+import email.errors
+import dataclasses
 import os.path
+from typing import Type
 
 from PyQt5.QtNetwork import QNetworkRequest
 
-from qutebrowser.utils import log
-from qutebrowser.browser.webkit import rfc6266
+from qutebrowser.utils import log, utils
+
+
+class ContentDispositionError(Exception):
+
+    """Base class for RFC6266 errors."""
+
+
+@dataclasses.dataclass
+class DefectWrapper:
+
+    """Wrapper around a email.error for comparison."""
+
+    error_class: Type[email.errors.MessageDefect]
+    line: str
+
+    def __eq__(self, other):
+        return (
+            isinstance(other, self.error_class)
+            and other.line == self.line  # type: ignore[attr-defined]
+        )
+
+
+class ContentDisposition:
+
+    """Records various indications and hints about content disposition.
+
+    These can be used to know if a file should be downloaded or
+    displayed directly, and to hint what filename it should have
+    in the download case.
+    """
+
+    # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth
+    # one which has a slightly different wording ("duplicate(s) ignored" instead of
+    # "duplicate ignored"), because even if we did ignore that one, it still wouldn't
+    # work properly...
+    _IGNORED_DEFECT = DefectWrapper(
+        email.errors.InvalidHeaderDefect,  # type: ignore[attr-defined]
+        'duplicate parameter name; duplicate ignored'
+    )
+
+    def __init__(self, disposition, params):
+        """Used internally after parsing the header."""
+        self.disposition = disposition
+        self.params = params
+        assert 'filename*' not in self.params  # Handled by headerregistry
+
+    @classmethod
+    def parse(cls, value):
+        """Build a _ContentDisposition from header values."""
+        # We allow non-ascii here (it will only be parsed inside of qdtext, and
+        # rejected by the grammar if it appears in other places), although parsing
+        # it can be ambiguous.  Parsing it ensures that a non-ambiguous filename*
+        # value won't get dismissed because of an unrelated ambiguity in the
+        # filename parameter. But it does mean we occasionally give
+        # less-than-certain values for some legacy senders.
+        try:
+            decoded = value.decode('iso-8859-1')
+        except UnicodeDecodeError as e:
+            raise ContentDispositionError(e)
+
+        reg = email.headerregistry.HeaderRegistry()
+
+        try:
+            parsed = reg('Content-Disposition', decoded)
+        except IndexError:
+            # WORKAROUND for https://bugs.python.org/issue37491
+            # Fixed in Python 3.7.5 and 3.8.0.
+            raise ContentDispositionError("Missing closing quote character")
+
+        if parsed.defects:
+            defects = list(parsed.defects)
+            if defects != [cls._IGNORED_DEFECT]:  # type: ignore[comparison-overlap]
+                raise ContentDispositionError(defects)
+
+        assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed
+        return cls(disposition=parsed.content_disposition, params=parsed.params)
+
+    def filename(self):
+        """The filename from the Content-Disposition header or None.
+
+        On safety:
+
+        This property records the intent of the sender.
+
+        You shouldn't use this sender-controlled value as a filesystem path, it
+        can be insecure. Serving files with this filename can be dangerous as
+        well, due to a certain browser using the part after the dot for
+        mime-sniffing.  Saving it to a database is fine by itself though.
+        """
+        return self.params.get('filename')
+
+    def is_inline(self):
+        """Return if the file should be handled inline.
+
+        If not, and unless your application supports other dispositions
+        than the standard inline and attachment, it should be handled
+        as an attachment.
+        """
+        return self.disposition in {None, 'inline'}
+
+    def __repr__(self):
+        return utils.get_repr(self, constructor=True,
+                              disposition=self.disposition, params=self.params)
 
 
 def parse_content_disposition(reply):
@@ -47,11 +152,11 @@ def parse_content_disposition(reply):
         # os.path.basename later.
         try:
             value = bytes(reply.rawHeader(content_disposition_header))
-            log.rfc6266.debug("Parsing Content-Disposition: {value!r}")
-            content_disposition = rfc6266.ContentDisposition.parse(value)
+            log.network.debug("Parsing Content-Disposition: {value!r}")
+            content_disposition = ContentDisposition.parse(value)
             filename = content_disposition.filename()
-        except rfc6266.Error as e:
-            log.rfc6266.error(f"Error while parsing filename: {e}")
+        except ContentDispositionError as e:
+            log.network.error(f"Error while parsing filename: {e}")
         else:
             is_inline = content_disposition.is_inline()
     # Then try to get filename from url