summaryrefslogtreecommitdiff
path: root/searx/botdetection/http_accept_encoding.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/botdetection/http_accept_encoding.py')
-rw-r--r--searx/botdetection/http_accept_encoding.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/searx/botdetection/http_accept_encoding.py b/searx/botdetection/http_accept_encoding.py
new file mode 100644
index 000000000..ae630fd68
--- /dev/null
+++ b/searx/botdetection/http_accept_encoding.py
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_accept_encoding``
+-------------------------------
+
+The ``http_accept_encoding`` method evaluates a request as the request of a
+bot if the Accept-Encoding_ header ..
+
+- did not contain ``gzip`` AND ``deflate`` (if both values are missed)
+- did not contain ``text/html``
+
+.. _Accept-Encoding:
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
+
+"""
+
+from typing import Optional, Tuple
+import flask
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+ accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
+ if not ('gzip' in accept_list or 'deflate' in accept_list):
+ return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate"
+ return None