summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/admin/engines/settings.rst2
-rw-r--r--docs/src/searx.botdetection.rst45
-rw-r--r--docs/src/searx.plugins.limiter.rst13
-rw-r--r--searx/botdetection/__init__.py26
-rw-r--r--searx/botdetection/http_accept.py24
-rw-r--r--searx/botdetection/http_accept_encoding.py26
-rw-r--r--searx/botdetection/http_accept_language.py23
-rw-r--r--searx/botdetection/http_connection.py23
-rw-r--r--searx/botdetection/http_user_agent.py54
-rw-r--r--searx/botdetection/ip_limit.py90
-rw-r--r--searx/botdetection/limiter.py79
-rw-r--r--searx/botdetection/link_token.py126
-rw-r--r--searx/plugins/limiter.py155
-rw-r--r--searx/templates/simple/base.html2
-rwxr-xr-xsearx/webapp.py12
15 files changed, 540 insertions, 160 deletions
diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst
index f9a1dad4f..63478f441 100644
--- a/docs/admin/engines/settings.rst
+++ b/docs/admin/engines/settings.rst
@@ -235,7 +235,7 @@ Global Settings
``limiter`` :
Rate limit the number of request on the instance, block some bots. The
- :ref:`limiter plugin` requires a :ref:`settings redis` database.
+ :ref:`limiter src` requires a :ref:`settings redis` database.
.. _image_proxy:
diff --git a/docs/src/searx.botdetection.rst b/docs/src/searx.botdetection.rst
new file mode 100644
index 000000000..85e0ce4cd
--- /dev/null
+++ b/docs/src/searx.botdetection.rst
@@ -0,0 +1,45 @@
+.. _botdetection:
+
+=============
+Bot Detection
+=============
+
+.. contents:: Contents
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+.. automodule:: searx.botdetection
+ :members:
+
+.. automodule:: searx.botdetection.limiter
+ :members:
+
+
+Rate limit
+==========
+
+.. automodule:: searx.botdetection.ip_limit
+ :members:
+
+.. automodule:: searx.botdetection.link_token
+ :members:
+
+
+Probe HTTP headers
+==================
+
+.. automodule:: searx.botdetection.http_accept
+ :members:
+
+.. automodule:: searx.botdetection.http_accept_encoding
+ :members:
+
+.. automodule:: searx.botdetection.http_accept_language
+ :members:
+
+.. automodule:: searx.botdetection.http_connection
+ :members:
+
+.. automodule:: searx.botdetection.http_user_agent
+ :members:
diff --git a/docs/src/searx.plugins.limiter.rst b/docs/src/searx.plugins.limiter.rst
deleted file mode 100644
index 75d06f5c2..000000000
--- a/docs/src/searx.plugins.limiter.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-.. _limiter plugin:
-
-==============
-Limiter Plugin
-==============
-
-.. sidebar:: info
-
- The :ref:`limiter plugin` requires a :ref:`Redis <settings redis>` database.
-
-.. automodule:: searx.plugins.limiter
- :members:
-
diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py
new file mode 100644
index 000000000..78a7d30f3
--- /dev/null
+++ b/searx/botdetection/__init__.py
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+""".. _botdetection src:
+
+Bot detection methods
+---------------------
+
+The methods implemented in this python package are use by the :ref:`limiter src`.
+
+"""
+
+import flask
+
+
+def dump_request(request: flask.Request):
+ return (
+ "%s: '%s'" % (request.headers.get('X-Forwarded-For'), request.path)
+ + " || form: %s" % request.form
+ + " || Accept: %s" % request.headers.get('Accept')
+ + " || Accept-Language: %s" % request.headers.get('Accept-Language')
+ + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
+ + " || Content-Type: %s" % request.headers.get('Content-Type')
+ + " || Content-Length: %s" % request.headers.get('Content-Length')
+ + " || Connection: %s" % request.headers.get('Connection')
+ + " || User-Agent: %s" % request.headers.get('User-Agent')
+ )
diff --git a/searx/botdetection/http_accept.py b/searx/botdetection/http_accept.py
new file mode 100644
index 000000000..1ab7cb4c1
--- /dev/null
+++ b/searx/botdetection/http_accept.py
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_accept``
+----------------------
+
+The ``http_accept`` method evaluates a request as the request of a bot if the
+Accept_ header ..
+
+- did not contain ``text/html``
+
+.. _Accept:
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
+
+"""
+
+from typing import Optional, Tuple
+import flask
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+ if 'text/html' not in request.accept_mimetypes:
+ return 429, "bot detected, HTTP header Accept did not contain text/html"
+ return None
diff --git a/searx/botdetection/http_accept_encoding.py b/searx/botdetection/http_accept_encoding.py
new file mode 100644
index 000000000..ae630fd68
--- /dev/null
+++ b/searx/botdetection/http_accept_encoding.py
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_accept_encoding``
+-------------------------------
+
+The ``http_accept_encoding`` method evaluates a request as the request of a
+bot if the Accept-Encoding_ header ..
+
+- did not contain ``gzip`` AND ``deflate`` (if both values are missed)
+- did not contain ``text/html``
+
+.. _Accept-Encoding:
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
+
+"""
+
+from typing import Optional, Tuple
+import flask
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+ accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
+ if not ('gzip' in accept_list or 'deflate' in accept_list):
+ return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate"
+ return None
diff --git a/searx/botdetection/http_accept_language.py b/searx/botdetection/http_accept_language.py
new file mode 100644
index 000000000..06743802e
--- /dev/null
+++ b/searx/botdetection/http_accept_language.py
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_accept_language``
+-------------------------------
+
+The ``http_accept_language`` method evaluates a request as the request of a bot
+if the Accept-Language_ header is unset.
+
+.. _Accept-Language:
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
+
+"""
+
+
+from typing import Optional, Tuple
+import flask
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+ if request.headers.get('Accept-Language', '').strip() == '':
+ return 429, "bot detected, missing HTTP header Accept-Language"
+ return None
diff --git a/searx/botdetection/http_connection.py b/searx/botdetection/http_connection.py
new file mode 100644
index 000000000..f61f5e48c
--- /dev/null
+++ b/searx/botdetection/http_connection.py
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_connection``
+--------------------------
+
+The ``http_connection`` method evaluates a request as the request of a bot if
+the Connection_ header is set to ``close``.
+
+.. _Connection:
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
+
+"""
+
+
+from typing import Optional, Tuple
+import flask
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+ if request.headers.get('Connection', '').strip() == 'close':
+ return 429, "bot detected, HTTP header 'Connection=close'"
+ return None
diff --git a/searx/botdetection/http_user_agent.py b/searx/botdetection/http_user_agent.py
new file mode 100644
index 000000000..892ae0bd9
--- /dev/null
+++ b/searx/botdetection/http_user_agent.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_user_agent``
+--------------------------
+
+The ``http_user_agent`` method evaluates a request as the request of a bot if
+the User-Agent_ header is unset or matches the regular expression
+:py:obj:`USER_AGENT`.
+
+.. _User-Agent:
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
+
+"""
+
+from typing import Optional, Tuple
+import re
+import flask
+
+USER_AGENT = (
+ r'('
+ + r'unknown'
+ + r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
+ + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
+ + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
+ + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
+ + r'|ZmEu|BLEXBot|bitlybot'
+ # unmaintained Farside instances
+ + r'|'
+ + re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
+ # other bots and client to block
+ + '|.*PetalBot.*'
+ + r')'
+)
+"""Regular expression that matches to User-Agent_ from known *bots*"""
+
+_regexp = None
+
+
+def regexp_user_agent():
+ global _regexp # pylint: disable=global-statement
+ if not _regexp:
+ _regexp = re.compile(USER_AGENT)
+ return _regexp
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+ user_agent = request.headers.get('User-Agent', 'unknown')
+ if regexp_user_agent().match(user_agent):
+ return (
+ 429,
+ f"bot detected, HTTP header User-Agent: {user_agent}",
+ )
+ return None
diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
new file mode 100644
index 000000000..fce3f8b67
--- /dev/null
+++ b/searx/botdetection/ip_limit.py
@@ -0,0 +1,90 @@
+"""
+Method ``ip_limit``
+-------------------
+
+The ``ip_limit`` method counts request from an IP in *sliding windows*. If
+there are to many requests in a sliding window, the request is evaluated as a
+bot request. This method requires a redis DB and needs a HTTP X-Forwarded-For_
+header. To take privacy only the hash value of an IP is stored in the redis DB
+and at least for a maximum of 10 minutes.
+
+The :py:obj:`link_token` method is used to investigate whether a request is
+*suspicious*. If the :py:obj:`link_token` method is activated and a request is
+*suspicious* the request rates are reduced:
+
+- :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
+- :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
+
+.. _X-Forwarded-For:
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
+
+"""
+
+from typing import Optional, Tuple
+import flask
+
+from searx import redisdb
+from searx import logger
+from searx.redislib import incr_sliding_window
+
+from . import link_token
+
+logger = logger.getChild('botdetection.ip_limit')
+
+BURST_WINDOW = 20
+"""Time (sec) before sliding window for *burst* requests expires."""
+
+BURST_MAX = 15
+"""Maximum requests from one IP in the :py:obj:`BURST_WINDOW`"""
+
+BURST_MAX_SUSPICIOUS = 2
+"""Maximum of suspicious requests from one IP in the :py:obj:`BURST_WINDOW`"""
+
+LONG_WINDOW = 600
+"""Time (sec) before the longer sliding window expires."""
+
+LONG_MAX = 150
+"""Maximum requests from one IP in the :py:obj:`LONG_WINDOW`"""
+
+LONG_MAX_SUSPICIOUS = 10
+"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
+
+API_WONDOW = 3600
+"""Time (sec) before sliding window for API requests (format != html) expires."""
+
+API_MAX = 4
+"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+ redis_client = redisdb.client()
+
+ x_forwarded_for = request.headers.get('X-Forwarded-For', '')
+ if not x_forwarded_for:
+ logger.error("missing HTTP header X-Forwarded-For")
+
+ if request.args.get('format', 'html') != 'html':
+ c = incr_sliding_window(redis_client, 'IP limit - API_WONDOW:' + x_forwarded_for, API_WONDOW)
+ if c > API_MAX:
+ return 429, "BLOCK %s: API limit exceeded"
+
+ suspicious = link_token.is_suspicious(request)
+
+ if suspicious:
+ c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
+ if c > BURST_MAX_SUSPICIOUS:
+ return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX_SUSPICIOUS"
+
+ c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW)
+ if c > LONG_MAX_SUSPICIOUS:
+ return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX_SUSPICIOUS"
+
+ else:
+ c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
+ if c > BURST_MAX:
+ return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX"
+
+ c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW)
+ if c > LONG_MAX:
+ return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX"
+ return None
diff --git a/searx/botdetection/limiter.py b/searx/botdetection/limiter.py
new file mode 100644
index 000000000..71044c312
--- /dev/null
+++ b/searx/botdetection/limiter.py
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+""".. _limiter src:
+
+Limiter
+=======
+
+.. sidebar:: info
+
+ The limiter requires a :ref:`Redis <settings redis>` database.
+
+Bot protection / IP rate limitation. The intention of rate limitation is to
+limit suspicious requests from an IP. The motivation behind this is the fact
+that SearXNG passes through requests from bots and is thus classified as a bot
+itself. As a result, the SearXNG engine then receives a CAPTCHA or is blocked
+by the search engine (the origin) in some other way.
+
+To avoid blocking, the requests from bots to SearXNG must also be blocked, this
+is the task of the limiter. To perform this task, the limiter uses the methods
+from the :py:obj:`searx.botdetection`.
+
+To enable the limiter activate:
+
+.. code:: yaml
+
+ server:
+ ...
+ limiter: true # rate limit the number of request on the instance, block some bots
+
+and set the redis-url connection. Check the value, it depends on your redis DB
+(see :ref:`settings redis`), by example:
+
+.. code:: yaml
+
+ redis:
+ url: unix:///usr/local/searxng-redis/run/redis.sock?db=0
+
+"""
+
+from typing import Optional, Tuple
+import flask
+
+from searx.botdetection import (
+ http_accept,
+ http_accept_encoding,
+ http_accept_language,
+ http_connection,
+ http_user_agent,
+ ip_limit,
+)
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+
+ if request.path == '/healthz':
+ return None
+
+ for func in [
+ http_user_agent,
+ ]:
+ val = func.filter_request(request)
+ if val is not None:
+ return val
+
+ if request.path == '/search':
+
+ for func in [
+ http_accept,
+ http_accept_encoding,
+ http_accept_language,
+ http_connection,
+ http_user_agent,
+ ip_limit,
+ ]:
+ val = func.filter_request(request)
+ if val is not None:
+ return val
+
+ return None
diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py
new file mode 100644
index 000000000..8ef215f6c
--- /dev/null
+++ b/searx/botdetection/link_token.py
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``link_token``
+---------------------
+
+The ``link_token`` method evaluates a request as :py:obj:`suspicious
+<is_suspicious>` if the URL ``/client<token>.css`` is not requested by the
+client. By adding a random component (the token) in the URL a bot can not send
+a ping by request a static URL.
+
+.. note::
+
+ This method requires a redis DB and needs a HTTP X-Forwarded-For_ header.
+
+To get in use of this method a flask URL route needs to be added:
+
+.. code:: python
+
+ @app.route('/client<token>.css', methods=['GET', 'POST'])
+ def client_token(token=None):
+ link_token.ping(request, token)
+ return Response('', mimetype='text/css')
+
+And in the HTML template from flask a stylesheet link is needed (the value of
+``link_token`` comes from :py:obj:`get_token`):
+
+.. code:: html
+
+ <link rel="stylesheet"
+ href="{{ url_for('client_token', token=link_token) }}"
+ type="text/css" />
+
+.. _X-Forwarded-For:
+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
+
+"""
+
+import string
+import random
+import flask
+
+from searx import logger
+from searx import redisdb
+from searx.redislib import secret_hash
+
+TOKEN_LIVE_TIME = 600
+"""Livetime (sec) of limiter's CSS token."""
+
+PING_KEY = 'SearXNG_limiter.ping'
+TOKEN_KEY = 'SearXNG_limiter.token'
+
+logger = logger.getChild('botdetection.link_token')
+
+
+def is_suspicious(request: flask.Request):
+ """Checks if there is a valid ping for this request, if not this request is
+ rated as *suspicious*"""
+ redis_client = redisdb.client()
+ if not redis_client:
+ return False
+
+ ping_key = get_ping_key(request)
+ if not redis_client.get(ping_key):
+ logger.warning(
+ "missing ping (IP: %s) / request: %s",
+ request.headers.get('X-Forwarded-For', ''),
+ ping_key,
+ )
+ return True
+
+ logger.debug("found ping for this request: %s", ping_key)
+ return False
+
+
+def ping(request: flask.Request, token: str):
+ """This function is called by a request to URL ``/client<token>.css``"""
+ redis_client = redisdb.client()
+ if not redis_client:
+ return
+ if not token_is_valid(token):
+ return
+ ping_key = get_ping_key(request)
+ logger.debug("store ping for: %s", ping_key)
+ redis_client.set(ping_key, 1, ex=TOKEN_LIVE_TIME)
+
+
+def get_ping_key(request: flask.Request):
+ """Generates a hashed key that fits (more or less) to a request. At least
+ X-Forwarded-For_ is needed to be able to assign the request to an IP.
+
+ """
+ return secret_hash(
+ PING_KEY
+ + request.headers.get('X-Forwarded-For', '')
+ + request.headers.get('Accept-Language', '')
+ + request.headers.get('User-Agent', '')
+ )
+
+
+def token_is_valid(token) -> bool:
+ valid = token == get_token()
+ logger.debug("token is valid --> %s", valid)
+ return valid
+
+
+def get_token() -> str:
+ """Returns current token. If there is no currently active token a new token
+ is generated randomly and stored in the redis DB.
+
+ - :py:obj:`TOKEN_LIVE_TIME`
+ - :py:obj:`TOKEN_KEY`
+
+ """
+ redis_client = redisdb.client()
+ if not redis_client:
+ # This function is also called when limiter is inactive / no redis DB
+ # (see render function in webapp.py)
+ return '12345678'
+ token = redis_client.get(TOKEN_KEY)
+ if token:
+ token = token.decode('UTF-8')
+ else:
+ token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
+ redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
+ return token
diff --git a/searx/plugins/limiter.py b/searx/plugins/limiter.py
index 69bd576d4..d9566b92b 100644
--- a/searx/plugins/limiter.py
+++ b/searx/plugins/limiter.py
@@ -1,165 +1,42 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pyright: basic
-"""Some bot protection / rate limitation
+"""see :ref:`limiter src`"""
-To monitor rate limits and protect privacy the IP addresses are getting stored
-with a hash so the limiter plugin knows who to block. A redis database is
-needed to store the hash values.
-
-Enable the plugin in ``settings.yml``:
-
-- ``server.limiter: true``
-- ``redis.url: ...`` check the value, see :ref:`settings redis`
-"""
-
-import re
-import string
-import random
-from flask import request
+import flask
from searx import redisdb
from searx.plugins import logger
-from searx.redislib import incr_sliding_window, secret_hash
+from searx.botdetection import limiter
+from searx.botdetection import dump_request
name = "Request limiter"
description = "Limit the number of request"
default_on = False
preference_section = 'service'
-logger = logger.getChild('limiter')
-
-block_user_agent = re.compile(
- r'('
- + r'unknown'
- + r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
- + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
- + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
- + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
- + r'|ZmEu|BLEXBot|bitlybot'
- # unmaintained Farside instances
- + r'|'
- + re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
- + '|.*PetalBot.*'
- + r')'
-)
-
-PING_KEY = 'SearXNG_limiter.ping'
-TOKEN_KEY = 'SearXNG_limiter.token'
-
-
-def ping():
- redis_client = redisdb.client()
- user_agent = request.headers.get('User-Agent', 'unknown')
- x_forwarded_for = request.headers.get('X-Forwarded-For', '')
-
- ping_key = PING_KEY + user_agent + x_forwarded_for
- redis_client.set(secret_hash(ping_key), 1, ex=600)
-
-
-def get_token():
- redis_client = redisdb.client()
- if not redis_client:
- # This function is also called when limiter is inactive / no redis DB
- # (see render function in webapp.py)
- return '12345678'
- token = redis_client.get(TOKEN_KEY)
- if token:
- token = token.decode('UTF-8')
- else:
- token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(8))
- redis_client.set(TOKEN_KEY, token, ex=600)
- return token
-
-
-def token_is_valid(token):
- valid = token == get_token()
- logger.debug("token is valid --> %s", valid)
- return valid
-
-
-def is_accepted_request() -> bool:
- # pylint: disable=too-many-return-statements
- redis_client = redisdb.client()
- user_agent = request.headers.get('User-Agent', 'unknown')
- x_forwarded_for = request.headers.get('X-Forwarded-For', '')
-
- if request.path == '/healthz':
- return True
- if block_user_agent.match(user_agent):
- logger.debug("BLOCK %s: %s --> detected User-Agent: %s" % (x_forwarded_for, request.path, user_agent))
- return False
-
- if request.path == '/search':
-
- c_burst_max = 2
- c_10min_max = 10
-
- ping_key = PING_KEY + user_agent + x_forwarded_for
- if redis_client.get(secret_hash(ping_key)):
- logger.debug('got a ping')
- c_burst_max = 15
- c_10min_max = 150
- else:
- logger.debug('missing a ping')
-
- c_burst = incr_sliding_window(redis_client, 'IP limit, burst' + x_forwarded_for, 20)
- c_10min = incr_sliding_window(redis_client, 'IP limit, 10 minutes' + x_forwarded_for, 600)
- if c_burst > c_burst_max or c_10min > c_10min_max:
- logger.debug("BLOCK %s: too many request", x_forwarded_for)
- return False
-
- if len(request.headers.get('Accept-Language', '').strip()) == '':
- logger.debug("BLOCK %s: missing Accept-Language", x_forwarded_for)
- return False
-
- if request.headers.get('Connection') == 'close':
- logger.debug("BLOCK %s: got Connection=close", x_forwarded_for)
- return False
-
- accept_encoding_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
- if 'gzip' not in accept_encoding_list and 'deflate' not in accept_encoding_list:
- logger.debug("BLOCK %s: suspicious Accept-Encoding", x_forwarded_for)
- return False
-
- if 'text/html' not in request.accept_mimetypes:
- logger.debug("BLOCK %s: Accept-Encoding misses text/html", x_forwarded_for)
- return False
-
- if request.args.get('format', 'html') != 'html':
- c = incr_sliding_window(redis_client, 'API limit' + x_forwarded_for, 3600)
- if c > 4:
- logger.debug("BLOCK %s: API limit exceeded", x_forwarded_for)
- return False
+logger = logger.getChild('limiter')
- logger.debug(
- "OK %s: '%s'" % (x_forwarded_for, request.path)
- + " || form: %s" % request.form
- + " || Accept: %s" % request.headers.get('Accept', '')
- + " || Accept-Language: %s" % request.headers.get('Accept-Language', '')
- + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding', '')
- + " || Content-Type: %s" % request.headers.get('Content-Type', '')
- + " || Content-Length: %s" % request.headers.get('Content-Length', '')
- + " || Connection: %s" % request.headers.get('Connection', '')
- + " || User-Agent: %s" % user_agent
- )
- return True
+def pre_request():
+ """See :ref:`flask.Flask.before_request`"""
+ val = limiter.filter_request(flask.request)
+ if val is not None:
+ http_status, msg = val
+ client_ip = flask.request.headers.get('X-Forwarded-For', '<unknown>')
+ logger.error("BLOCK (IP %s): %s" % (client_ip, msg))
+ return 'Too Many Requests', http_status
-def pre_request():
- if not is_accepted_request():
- return 'Too Many Requests', 429
+ logger.debug("OK: %s" % dump_request(flask.request))
return None
-def init(app, settings):
+def init(app: flask.Flask, settings) -> bool:
if not settings['server']['limiter']:
return False
-
if not redisdb.client():
- logger.error("The limiter requires Redis") # pylint: disable=undefined-variable
+ logger.error("The limiter requires Redis")
return False
-
app.before_request(pre_request)
return True
diff --git a/searx/templates/simple/base.html b/searx/templates/simple/base.html
index 9f7cdbb8e..3c6ed11c7 100644
--- a/searx/templates/simple/base.html
+++ b/searx/templates/simple/base.html
@@ -18,7 +18,7 @@
<link rel="stylesheet" href="{{ url_for('static', filename='css/searxng.min.css') }}" type="text/css" media="screen" />
{% endif %}
{% if get_setting('server.limiter') %}
- <link rel="stylesheet" href="{{ url_for('limiter_css', token=limiter_token) }}" type="text/css" media="screen" />
+ <link rel="stylesheet" href="{{ url_for('client_token', token=link_token) }}" type="text/css" />
{% endif %}
{% block styles %}{% endblock %}
<!--[if gte IE 9]>-->
diff --git a/searx/webapp.py b/searx/webapp.py
index 815bfcabd..d6322447a 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -93,7 +93,8 @@ from searx.utils import (
)
from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
from searx.query import RawTextQuery
-from searx.plugins import limiter, Plugin, plugins, initialize as plugin_initialize
+from searx.plugins import Plugin, plugins, initialize as plugin_initialize
+from searx.botdetection import link_token
from searx.plugins.oa_doi_rewrite import get_doi_resolver
from searx.preferences import (
Preferences,
@@ -416,7 +417,7 @@ def render(template_name: str, **kwargs):
kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint
kwargs['cookies'] = request.cookies
kwargs['errors'] = request.errors
- kwargs['limiter_token'] = limiter.get_token()
+ kwargs['link_token'] = link_token.get_token()
# values from the preferences
kwargs['preferences'] = request.preferences
@@ -643,10 +644,9 @@ def health():
return Response('OK', mimetype='text/plain')
-@app.route('/limiter<token>.css', methods=['GET', 'POST'])
-def limiter_css(token=None):
- if limiter.token_is_valid(token):
- limiter.ping()
+@app.route('/client<token>.css', methods=['GET', 'POST'])
+def client_token(token=None):
+ link_token.ping(request, token)
return Response('', mimetype='text/css')