summaryrefslogtreecommitdiff
path: root/qutebrowser/browser/webkit/http.py
diff options
context:
space:
mode:
Diffstat (limited to 'qutebrowser/browser/webkit/http.py')
-rw-r--r--qutebrowser/browser/webkit/http.py119
1 files changed, 112 insertions, 7 deletions
diff --git a/qutebrowser/browser/webkit/http.py b/qutebrowser/browser/webkit/http.py
index c2860eea1..b96614d0a 100644
--- a/qutebrowser/browser/webkit/http.py
+++ b/qutebrowser/browser/webkit/http.py
@@ -19,13 +19,118 @@
"""Parsing functions for various HTTP headers."""
-
+import email.headerregistry
+import email.errors
+import dataclasses
import os.path
+from typing import Type
from PyQt5.QtNetwork import QNetworkRequest
-from qutebrowser.utils import log
-from qutebrowser.browser.webkit import rfc6266
+from qutebrowser.utils import log, utils
+
+
+class ContentDispositionError(Exception):
+
+ """Base class for RFC6266 errors."""
+
+
+@dataclasses.dataclass
+class DefectWrapper:
+
+ """Wrapper around a email.error for comparison."""
+
+ error_class: Type[email.errors.MessageDefect]
+ line: str
+
+ def __eq__(self, other):
+ return (
+ isinstance(other, self.error_class)
+ and other.line == self.line # type: ignore[attr-defined]
+ )
+
+
+class ContentDisposition:
+
+ """Records various indications and hints about content disposition.
+
+ These can be used to know if a file should be downloaded or
+ displayed directly, and to hint what filename it should have
+ in the download case.
+ """
+
+ # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth
+ # one which has a slightly different wording ("duplicate(s) ignored" instead of
+ # "duplicate ignored"), because even if we did ignore that one, it still wouldn't
+ # work properly...
+ _IGNORED_DEFECT = DefectWrapper(
+ email.errors.InvalidHeaderDefect, # type: ignore[attr-defined]
+ 'duplicate parameter name; duplicate ignored'
+ )
+
+ def __init__(self, disposition, params):
+ """Used internally after parsing the header."""
+ self.disposition = disposition
+ self.params = params
+ assert 'filename*' not in self.params # Handled by headerregistry
+
+ @classmethod
+ def parse(cls, value):
+ """Build a _ContentDisposition from header values."""
+ # We allow non-ascii here (it will only be parsed inside of qdtext, and
+ # rejected by the grammar if it appears in other places), although parsing
+ # it can be ambiguous. Parsing it ensures that a non-ambiguous filename*
+ # value won't get dismissed because of an unrelated ambiguity in the
+ # filename parameter. But it does mean we occasionally give
+ # less-than-certain values for some legacy senders.
+ try:
+ decoded = value.decode('iso-8859-1')
+ except UnicodeDecodeError as e:
+ raise ContentDispositionError(e)
+
+ reg = email.headerregistry.HeaderRegistry()
+
+ try:
+ parsed = reg('Content-Disposition', decoded)
+ except IndexError:
+ # WORKAROUND for https://bugs.python.org/issue37491
+ # Fixed in Python 3.7.5 and 3.8.0.
+ raise ContentDispositionError("Missing closing quote character")
+
+ if parsed.defects:
+ defects = list(parsed.defects)
+ if defects != [cls._IGNORED_DEFECT]: # type: ignore[comparison-overlap]
+ raise ContentDispositionError(defects)
+
+ assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed
+ return cls(disposition=parsed.content_disposition, params=parsed.params)
+
+ def filename(self):
+ """The filename from the Content-Disposition header or None.
+
+ On safety:
+
+ This property records the intent of the sender.
+
+ You shouldn't use this sender-controlled value as a filesystem path, it
+ can be insecure. Serving files with this filename can be dangerous as
+ well, due to a certain browser using the part after the dot for
+ mime-sniffing. Saving it to a database is fine by itself though.
+ """
+ return self.params.get('filename')
+
+ def is_inline(self):
+ """Return if the file should be handled inline.
+
+ If not, and unless your application supports other dispositions
+ than the standard inline and attachment, it should be handled
+ as an attachment.
+ """
+ return self.disposition in {None, 'inline'}
+
+ def __repr__(self):
+ return utils.get_repr(self, constructor=True,
+ disposition=self.disposition, params=self.params)
def parse_content_disposition(reply):
@@ -47,11 +152,11 @@ def parse_content_disposition(reply):
# os.path.basename later.
try:
value = bytes(reply.rawHeader(content_disposition_header))
- log.rfc6266.debug("Parsing Content-Disposition: {value!r}")
- content_disposition = rfc6266.ContentDisposition.parse(value)
+ log.network.debug("Parsing Content-Disposition: {value!r}")
+ content_disposition = ContentDisposition.parse(value)
filename = content_disposition.filename()
- except rfc6266.Error as e:
- log.rfc6266.error(f"Error while parsing filename: {e}")
+ except ContentDispositionError as e:
+ log.network.error(f"Error while parsing filename: {e}")
else:
is_inline = content_disposition.is_inline()
# Then try to get filename from url