diff options
Diffstat (limited to 'qutebrowser/browser/webkit/http.py')
-rw-r--r-- | qutebrowser/browser/webkit/http.py | 119 |
1 files changed, 112 insertions, 7 deletions
diff --git a/qutebrowser/browser/webkit/http.py b/qutebrowser/browser/webkit/http.py index c2860eea1..b96614d0a 100644 --- a/qutebrowser/browser/webkit/http.py +++ b/qutebrowser/browser/webkit/http.py @@ -19,13 +19,118 @@ """Parsing functions for various HTTP headers.""" - +import email.headerregistry +import email.errors +import dataclasses import os.path +from typing import Type from PyQt5.QtNetwork import QNetworkRequest -from qutebrowser.utils import log -from qutebrowser.browser.webkit import rfc6266 +from qutebrowser.utils import log, utils + + +class ContentDispositionError(Exception): + + """Base class for RFC6266 errors.""" + + +@dataclasses.dataclass +class DefectWrapper: + + """Wrapper around a email.error for comparison.""" + + error_class: Type[email.errors.MessageDefect] + line: str + + def __eq__(self, other): + return ( + isinstance(other, self.error_class) + and other.line == self.line # type: ignore[attr-defined] + ) + + +class ContentDisposition: + + """Records various indications and hints about content disposition. + + These can be used to know if a file should be downloaded or + displayed directly, and to hint what filename it should have + in the download case. + """ + + # Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth + # one which has a slightly different wording ("duplicate(s) ignored" instead of + # "duplicate ignored"), because even if we did ignore that one, it still wouldn't + # work properly... + _IGNORED_DEFECT = DefectWrapper( + email.errors.InvalidHeaderDefect, # type: ignore[attr-defined] + 'duplicate parameter name; duplicate ignored' + ) + + def __init__(self, disposition, params): + """Used internally after parsing the header.""" + self.disposition = disposition + self.params = params + assert 'filename*' not in self.params # Handled by headerregistry + + @classmethod + def parse(cls, value): + """Build a _ContentDisposition from header values.""" + # We allow non-ascii here (it will only be parsed inside of qdtext, and + # rejected by the grammar if it appears in other places), although parsing + # it can be ambiguous. Parsing it ensures that a non-ambiguous filename* + # value won't get dismissed because of an unrelated ambiguity in the + # filename parameter. But it does mean we occasionally give + # less-than-certain values for some legacy senders. + try: + decoded = value.decode('iso-8859-1') + except UnicodeDecodeError as e: + raise ContentDispositionError(e) + + reg = email.headerregistry.HeaderRegistry() + + try: + parsed = reg('Content-Disposition', decoded) + except IndexError: + # WORKAROUND for https://bugs.python.org/issue37491 + # Fixed in Python 3.7.5 and 3.8.0. + raise ContentDispositionError("Missing closing quote character") + + if parsed.defects: + defects = list(parsed.defects) + if defects != [cls._IGNORED_DEFECT]: # type: ignore[comparison-overlap] + raise ContentDispositionError(defects) + + assert isinstance(parsed, email.headerregistry.ContentDispositionHeader), parsed + return cls(disposition=parsed.content_disposition, params=parsed.params) + + def filename(self): + """The filename from the Content-Disposition header or None. + + On safety: + + This property records the intent of the sender. + + You shouldn't use this sender-controlled value as a filesystem path, it + can be insecure. Serving files with this filename can be dangerous as + well, due to a certain browser using the part after the dot for + mime-sniffing. Saving it to a database is fine by itself though. + """ + return self.params.get('filename') + + def is_inline(self): + """Return if the file should be handled inline. + + If not, and unless your application supports other dispositions + than the standard inline and attachment, it should be handled + as an attachment. + """ + return self.disposition in {None, 'inline'} + + def __repr__(self): + return utils.get_repr(self, constructor=True, + disposition=self.disposition, params=self.params) def parse_content_disposition(reply): @@ -47,11 +152,11 @@ def parse_content_disposition(reply): # os.path.basename later. try: value = bytes(reply.rawHeader(content_disposition_header)) - log.rfc6266.debug("Parsing Content-Disposition: {value!r}") - content_disposition = rfc6266.ContentDisposition.parse(value) + log.network.debug("Parsing Content-Disposition: {value!r}") + content_disposition = ContentDisposition.parse(value) filename = content_disposition.filename() - except rfc6266.Error as e: - log.rfc6266.error(f"Error while parsing filename: {e}") + except ContentDispositionError as e: + log.network.error(f"Error while parsing filename: {e}") else: is_inline = content_disposition.is_inline() # Then try to get filename from url |