diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2023-05-26 17:24:43 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2023-06-01 14:38:53 +0200 |
commit | 66fdec0eb92bf11c0bc477d6fb1df3dc783e4dcb (patch) | |
tree | 2ce8a9ecf1cdae68e4745c24cab01482d07a5447 /searx/botdetection | |
parent | 1ec325adccc427fe05cf08da9a2d9d63da7365f4 (diff) | |
download | searxng-66fdec0eb92bf11c0bc477d6fb1df3dc783e4dcb.tar.gz searxng-66fdec0eb92bf11c0bc477d6fb1df3dc783e4dcb.zip |
[mod] limiter: add config file /etc/searxng/limiter.toml
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/botdetection')
-rw-r--r-- | searx/botdetection/http_accept.py | 5 | ||||
-rw-r--r-- | searx/botdetection/http_accept_encoding.py | 5 | ||||
-rw-r--r-- | searx/botdetection/http_accept_language.py | 6 | ||||
-rw-r--r-- | searx/botdetection/http_connection.py | 6 | ||||
-rw-r--r-- | searx/botdetection/http_user_agent.py | 6 | ||||
-rw-r--r-- | searx/botdetection/ip_limit.py | 11 | ||||
-rw-r--r-- | searx/botdetection/limiter.py | 43 | ||||
-rw-r--r-- | searx/botdetection/limiter.toml | 3 |
8 files changed, 73 insertions, 12 deletions
diff --git a/searx/botdetection/http_accept.py b/searx/botdetection/http_accept.py index 1ab7cb4c1..23670a283 100644 --- a/searx/botdetection/http_accept.py +++ b/searx/botdetection/http_accept.py @@ -13,12 +13,15 @@ Accept_ header .. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept """ +# pylint: disable=unused-argument from typing import Optional, Tuple import flask +from searx.tools import config -def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: + +def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: if 'text/html' not in request.accept_mimetypes: return 429, "bot detected, HTTP header Accept did not contain text/html" return None diff --git a/searx/botdetection/http_accept_encoding.py b/searx/botdetection/http_accept_encoding.py index ae630fd68..191249711 100644 --- a/searx/botdetection/http_accept_encoding.py +++ b/searx/botdetection/http_accept_encoding.py @@ -14,12 +14,15 @@ bot if the Accept-Encoding_ header .. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding """ +# pylint: disable=unused-argument from typing import Optional, Tuple import flask +from searx.tools import config -def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: + +def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')] if not ('gzip' in accept_list or 'deflate' in accept_list): return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate" diff --git a/searx/botdetection/http_accept_language.py b/searx/botdetection/http_accept_language.py index 06743802e..558a216cf 100644 --- a/searx/botdetection/http_accept_language.py +++ b/searx/botdetection/http_accept_language.py @@ -11,13 +11,15 @@ if the Accept-Language_ header is unset. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent """ - +# pylint: disable=unused-argument from typing import Optional, Tuple import flask +from searx.tools import config + -def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: +def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: if request.headers.get('Accept-Language', '').strip() == '': return 429, "bot detected, missing HTTP header Accept-Language" return None diff --git a/searx/botdetection/http_connection.py b/searx/botdetection/http_connection.py index f61f5e48c..0ef24a7b8 100644 --- a/searx/botdetection/http_connection.py +++ b/searx/botdetection/http_connection.py @@ -11,13 +11,15 @@ the Connection_ header is set to ``close``. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection """ - +# pylint: disable=unused-argument from typing import Optional, Tuple import flask +from searx.tools import config + -def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: +def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: if request.headers.get('Connection', '').strip() == 'close': return 429, "bot detected, HTTP header 'Connection=close'" return None diff --git a/searx/botdetection/http_user_agent.py b/searx/botdetection/http_user_agent.py index 892ae0bd9..3d1ec9173 100644 --- a/searx/botdetection/http_user_agent.py +++ b/searx/botdetection/http_user_agent.py @@ -12,11 +12,15 @@ the User-Agent_ header is unset or matches the regular expression https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent """ +# pylint: disable=unused-argument from typing import Optional, Tuple import re import flask +from searx.tools import config + + USER_AGENT = ( r'(' + r'unknown' @@ -44,7 +48,7 @@ def regexp_user_agent(): return _regexp -def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: +def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: user_agent = request.headers.get('User-Agent', 'unknown') if regexp_user_agent().match(user_agent): return ( diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py index fce3f8b67..2646920c2 100644 --- a/searx/botdetection/ip_limit.py +++ b/searx/botdetection/ip_limit.py @@ -1,4 +1,5 @@ -""" +""".. _botdetection.ip_limit: + Method ``ip_limit`` ------------------- @@ -22,6 +23,8 @@ The :py:obj:`link_token` method is used to investigate whether a request is from typing import Optional, Tuple import flask +from searx.tools import config + from searx import redisdb from searx import logger @@ -56,7 +59,7 @@ API_MAX = 4 """Maximum requests from one IP in the :py:obj:`API_WONDOW`""" -def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: +def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]: redis_client = redisdb.client() x_forwarded_for = request.headers.get('X-Forwarded-For', '') @@ -68,7 +71,9 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: if c > API_MAX: return 429, "BLOCK %s: API limit exceeded" - suspicious = link_token.is_suspicious(request) + suspicious = False + if cfg['botdetection.ip_limit.link_token']: + suspicious = link_token.is_suspicious(request) if suspicious: c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW) diff --git a/searx/botdetection/limiter.py b/searx/botdetection/limiter.py index 71044c312..cc1e00b3c 100644 --- a/searx/botdetection/limiter.py +++ b/searx/botdetection/limiter.py @@ -38,8 +38,11 @@ and set the redis-url connection. Check the value, it depends on your redis DB """ from typing import Optional, Tuple +from pathlib import Path import flask +import pytomlpp as toml +from searx.tools import config from searx.botdetection import ( http_accept, http_accept_encoding, @@ -49,6 +52,42 @@ from searx.botdetection import ( ip_limit, ) +LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml" +"""Base configuration (schema) of the botdetection.""" + +LIMITER_CFG = Path('/etc/searxng/limiter.toml') +"""Lokal Limiter configuration.""" + +CFG_DEPRECATED = { + # "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config." +} + +CFG = config.Config({}, {}) + + +def init_cfg(log): + global CFG # pylint: disable=global-statement + CFG = config.Config(cfg_schema=toml.load(LIMITER_CFG_SCHEMA), deprecated=CFG_DEPRECATED) + + if not LIMITER_CFG.exists(): + log.warning("missing config file: %s", LIMITER_CFG) + return + + log.warning("load config file: %s", LIMITER_CFG) + try: + upd_cfg = toml.load(LIMITER_CFG) + except toml.DecodeError as exc: + msg = str(exc).replace('\t', '').replace('\n', ' ') + log.error("%s: %s", LIMITER_CFG, msg) + raise + + is_valid, issue_list = CFG.validate(upd_cfg) + for msg in issue_list: + log.error(str(msg)) + if not is_valid: + raise TypeError(f"schema of {LIMITER_CFG} is invalid, can't cutomize limiter configuration from!") + CFG.update(upd_cfg) + def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: @@ -58,7 +97,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: for func in [ http_user_agent, ]: - val = func.filter_request(request) + val = func.filter_request(request, CFG) if val is not None: return val @@ -72,7 +111,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]: http_user_agent, ip_limit, ]: - val = func.filter_request(request) + val = func.filter_request(request, CFG) if val is not None: return val diff --git a/searx/botdetection/limiter.toml b/searx/botdetection/limiter.toml new file mode 100644 index 000000000..30cd1b53c --- /dev/null +++ b/searx/botdetection/limiter.toml @@ -0,0 +1,3 @@ +[botdetection.ip_limit] + +link_token = true
\ No newline at end of file |