diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2023-05-28 18:58:31 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2023-06-01 14:38:53 +0200 |
commit | b8c7c2c9aa604fd1fb7be5559c9ad025ceb17aa4 (patch) | |
tree | e91de3d7005d6f55cce0f9b2f4ec69623fc196e7 /searx/botdetection/_helpers.py | |
parent | 52f1452c09ab2ec74aa5898d9ea749f33a71a814 (diff) | |
download | searxng-b8c7c2c9aa604fd1fb7be5559c9ad025ceb17aa4.tar.gz searxng-b8c7c2c9aa604fd1fb7be5559c9ad025ceb17aa4.zip |
[mod] botdetection - improve ip_limit and link_token methods
- counting requests in LONG_WINDOW and BURST_WINDOW is not needed when the
request is validated by the link_token method [1]
- renew a ping-key on validation [2], this is needed for infinite scrolling,
where no new token (CSS) is loaded. / this does not fix the BURST_MAX issue in
the vanilla limiter
- normalize the counter names of the ip_limit method to 'ip_limit.*'
- just integrate the ip_limit method straight forward in the limiter plugin /
non intermediate code --> ip_limit now returns None or a werkzeug.Response
object that can be passed by the plugin to the flask application / non
intermediate code that returns a tuple
[1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566113277
[2] https://github.com/searxng/searxng/pull/2357#discussion_r1208542206
[3] https://github.com/searxng/searxng/pull/2357#issuecomment-1566125979
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/botdetection/_helpers.py')
-rw-r--r-- | searx/botdetection/_helpers.py | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/searx/botdetection/_helpers.py b/searx/botdetection/_helpers.py new file mode 100644 index 000000000..b034b980b --- /dev/null +++ b/searx/botdetection/_helpers.py @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pylint: disable=missing-module-docstring, invalid-name + +from typing import Optional +import flask +import werkzeug + +from searx import logger + +logger = logger.getChild('botdetection') + + +def dump_request(request: flask.Request): + return ( + "%s: %s" % (get_real_ip(request), request.path) + + " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For') + + " || X-Real-IP: %s" % request.headers.get('X-Real-IP') + + " || form: %s" % request.form + + " || Accept: %s" % request.headers.get('Accept') + + " || Accept-Language: %s" % request.headers.get('Accept-Language') + + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding') + + " || Content-Type: %s" % request.headers.get('Content-Type') + + " || Content-Length: %s" % request.headers.get('Content-Length') + + " || Connection: %s" % request.headers.get('Connection') + + " || User-Agent: %s" % request.headers.get('User-Agent') + ) + + +def too_many_requests(request: flask.Request, log_msg: str) -> Optional[werkzeug.Response]: + log_prefix = 'BLOCK %s: ' % get_real_ip(request) + logger.debug(log_prefix + log_msg) + return flask.make_response(('Too Many Requests', 429)) + + +def get_real_ip(request: flask.Request) -> str: + """Returns real IP of the request. Since not all proxies set all the HTTP + headers and incoming headers can be faked it may happen that the IP cannot + be determined correctly. + + .. sidebar:: :py:obj:`flask.Request.remote_addr` + + SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``). + + This function tries to get the remote IP in the order listed below, + additional some tests are done and if inconsistencies or errors are + detected, they are logged. + + The remote IP of the request is taken from (first match): + + - X-Forwarded-For_ header + - `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__ + - :py:obj:`flask.Request.remote_addr` + + .. _ProxyFix: + https://werkzeug.palletsprojects.com/middleware/proxy_fix/ + + .. _X-Forwarded-For: + https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For + + """ + + forwarded_for = request.headers.get("X-Forwarded-For") + real_ip = request.headers.get('X-Real-IP') + remote_addr = request.remote_addr + logger.debug("X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr) + + if not forwarded_for: + logger.error("X-Forwarded-For header is not set!") + else: + from .limiter import get_cfg # pylint: disable=import-outside-toplevel, cyclic-import + + forwarded_for = [x.strip() for x in forwarded_for.split(',')] + x_for: int = get_cfg()['real_ip.x_for'] + forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)] + + if not real_ip: + logger.error("X-Real-IP header is not set!") + + if forwarded_for and real_ip and forwarded_for != real_ip: + logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for) + + if forwarded_for and remote_addr and forwarded_for != remote_addr: + logger.warning( + "IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for + ) + + if real_ip and remote_addr and real_ip != remote_addr: + logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip) + + request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0' + logger.debug("get_real_ip() -> %s", request_ip) + return request_ip |