1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
|
# SPDX-FileCopyrightText: Florian Bruhin (The Compiler) <mail@qutebrowser.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Parsing functions for various HTTP headers."""
import email.headerregistry
import email.errors
import dataclasses
import os.path
from typing import Type
from qutebrowser.qt.network import QNetworkRequest
from qutebrowser.utils import log, utils
class ContentDispositionError(Exception):
"""Base class for RFC6266 errors."""
@dataclasses.dataclass
class DefectWrapper:
"""Wrapper around a email.error for comparison."""
error_class: Type[email.errors.MessageDefect]
line: str
def __eq__(self, other):
return (
isinstance(other, self.error_class)
and other.line == self.line # type: ignore[attr-defined]
)
class ContentDisposition:
"""Records various indications and hints about content disposition.
These can be used to know if a file should be downloaded or
displayed directly, and to hint what filename it should have
in the download case.
"""
# Ignoring this defect fixes the attfnboth2 test case. It does *not* fix attfnboth
# one which has a slightly different wording ("duplicate(s) ignored" instead of
# "duplicate ignored"), because even if we did ignore that one, it still wouldn't
# work properly...
_IGNORED_DEFECT = DefectWrapper(
email.errors.InvalidHeaderDefect,
'duplicate parameter name; duplicate ignored'
)
def __init__(self, disposition, params):
"""Used internally after parsing the header."""
self.disposition = disposition
self.params = params
assert 'filename*' not in self.params # Handled by headerregistry
@classmethod
def parse(cls, value):
"""Build a _ContentDisposition from header values."""
# We allow non-ascii here (it will only be parsed inside of qdtext, and
# rejected by the grammar if it appears in other places), although parsing
# it can be ambiguous. Parsing it ensures that a non-ambiguous filename*
# value won't get dismissed because of an unrelated ambiguity in the
# filename parameter. But it does mean we occasionally give
# less-than-certain values for some legacy senders.
decoded = value.decode('iso-8859-1')
reg = email.headerregistry.HeaderRegistry()
try:
parsed = reg('Content-Disposition', decoded)
except IndexError: # pragma: no cover
# WORKAROUND for https://github.com/python/cpython/issues/81672
# Fixed in Python 3.7.5 and 3.8.0.
# Still getting failures on 3.10 on CI though
raise ContentDispositionError("Missing closing quote character")
except ValueError:
# WORKAROUND for https://github.com/python/cpython/issues/87112
raise ContentDispositionError("Non-ASCII digit")
except AttributeError: # pragma: no cover
# WORKAROUND for https://github.com/python/cpython/issues/93010
raise ContentDispositionError("Section number has an invalid leading 0")
if parsed.defects:
defects = list(parsed.defects)
if defects != [cls._IGNORED_DEFECT]:
raise ContentDispositionError(defects)
# https://github.com/python/mypy/issues/12314
assert isinstance(
parsed, # type: ignore[unreachable]
email.headerregistry.ContentDispositionHeader,
), parsed
return cls( # type: ignore[unreachable]
disposition=parsed.content_disposition,
params=parsed.params,
)
def filename(self):
"""The filename from the Content-Disposition header or None.
On safety:
This property records the intent of the sender.
You shouldn't use this sender-controlled value as a filesystem path, it
can be insecure. Serving files with this filename can be dangerous as
well, due to a certain browser using the part after the dot for
mime-sniffing. Saving it to a database is fine by itself though.
"""
return self.params.get('filename')
def is_inline(self):
"""Return if the file should be handled inline.
If not, and unless your application supports other dispositions
than the standard inline and attachment, it should be handled
as an attachment.
"""
return self.disposition in {None, 'inline'}
def __repr__(self):
return utils.get_repr(self, constructor=True,
disposition=self.disposition, params=self.params)
def parse_content_disposition(reply):
"""Parse a content_disposition header.
Args:
reply: The QNetworkReply to get a filename for.
Return:
A (is_inline, filename) tuple.
"""
is_inline = True
filename = None
content_disposition_header = b'Content-Disposition'
# First check if the Content-Disposition header has a filename
# attribute.
if reply.hasRawHeader(content_disposition_header):
# We use the unsafe variant of the filename as we sanitize it via
# os.path.basename later.
try:
value = bytes(reply.rawHeader(content_disposition_header))
log.network.debug(f"Parsing Content-Disposition: {value!r}")
content_disposition = ContentDisposition.parse(value)
filename = content_disposition.filename()
except ContentDispositionError as e:
log.network.error(f"Error while parsing filename: {e}")
else:
is_inline = content_disposition.is_inline()
# Then try to get filename from url
if not filename:
filename = reply.url().path().rstrip('/')
# If that fails as well, use a fallback
if not filename:
filename = 'qutebrowser-download'
return is_inline, os.path.basename(filename)
def parse_content_type(reply):
"""Parse a Content-Type header.
The parsing done here is very cheap, as we really only want to get the
Mimetype. Parameters aren't parsed specially.
Args:
reply: The QNetworkReply to handle.
Return:
A [mimetype, rest] list, or [None, None] if unset.
Rest can be None.
"""
content_type = reply.header(QNetworkRequest.KnownHeaders.ContentTypeHeader)
if content_type is None:
return [None, None]
if ';' in content_type:
ret = content_type.split(';', maxsplit=1)
else:
ret = [content_type, None]
ret[0] = ret[0].strip()
return ret
|