summaryrefslogtreecommitdiff
path: root/searx/botdetection
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2023-05-26 17:24:43 +0200
committerMarkus Heiser <markus.heiser@darmarit.de>2023-06-01 14:38:53 +0200
commit66fdec0eb92bf11c0bc477d6fb1df3dc783e4dcb (patch)
tree2ce8a9ecf1cdae68e4745c24cab01482d07a5447 /searx/botdetection
parent1ec325adccc427fe05cf08da9a2d9d63da7365f4 (diff)
downloadsearxng-66fdec0eb92bf11c0bc477d6fb1df3dc783e4dcb.tar.gz
searxng-66fdec0eb92bf11c0bc477d6fb1df3dc783e4dcb.zip
[mod] limiter: add config file /etc/searxng/limiter.toml
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/botdetection')
-rw-r--r--searx/botdetection/http_accept.py5
-rw-r--r--searx/botdetection/http_accept_encoding.py5
-rw-r--r--searx/botdetection/http_accept_language.py6
-rw-r--r--searx/botdetection/http_connection.py6
-rw-r--r--searx/botdetection/http_user_agent.py6
-rw-r--r--searx/botdetection/ip_limit.py11
-rw-r--r--searx/botdetection/limiter.py43
-rw-r--r--searx/botdetection/limiter.toml3
8 files changed, 73 insertions, 12 deletions
diff --git a/searx/botdetection/http_accept.py b/searx/botdetection/http_accept.py
index 1ab7cb4c1..23670a283 100644
--- a/searx/botdetection/http_accept.py
+++ b/searx/botdetection/http_accept.py
@@ -13,12 +13,15 @@ Accept_ header ..
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
"""
+# pylint: disable=unused-argument
from typing import Optional, Tuple
import flask
+from searx.tools import config
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
if 'text/html' not in request.accept_mimetypes:
return 429, "bot detected, HTTP header Accept did not contain text/html"
return None
diff --git a/searx/botdetection/http_accept_encoding.py b/searx/botdetection/http_accept_encoding.py
index ae630fd68..191249711 100644
--- a/searx/botdetection/http_accept_encoding.py
+++ b/searx/botdetection/http_accept_encoding.py
@@ -14,12 +14,15 @@ bot if the Accept-Encoding_ header ..
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
"""
+# pylint: disable=unused-argument
from typing import Optional, Tuple
import flask
+from searx.tools import config
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
if not ('gzip' in accept_list or 'deflate' in accept_list):
return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate"
diff --git a/searx/botdetection/http_accept_language.py b/searx/botdetection/http_accept_language.py
index 06743802e..558a216cf 100644
--- a/searx/botdetection/http_accept_language.py
+++ b/searx/botdetection/http_accept_language.py
@@ -11,13 +11,15 @@ if the Accept-Language_ header is unset.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
"""
-
+# pylint: disable=unused-argument
from typing import Optional, Tuple
import flask
+from searx.tools import config
+
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
if request.headers.get('Accept-Language', '').strip() == '':
return 429, "bot detected, missing HTTP header Accept-Language"
return None
diff --git a/searx/botdetection/http_connection.py b/searx/botdetection/http_connection.py
index f61f5e48c..0ef24a7b8 100644
--- a/searx/botdetection/http_connection.py
+++ b/searx/botdetection/http_connection.py
@@ -11,13 +11,15 @@ the Connection_ header is set to ``close``.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
"""
-
+# pylint: disable=unused-argument
from typing import Optional, Tuple
import flask
+from searx.tools import config
+
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
if request.headers.get('Connection', '').strip() == 'close':
return 429, "bot detected, HTTP header 'Connection=close'"
return None
diff --git a/searx/botdetection/http_user_agent.py b/searx/botdetection/http_user_agent.py
index 892ae0bd9..3d1ec9173 100644
--- a/searx/botdetection/http_user_agent.py
+++ b/searx/botdetection/http_user_agent.py
@@ -12,11 +12,15 @@ the User-Agent_ header is unset or matches the regular expression
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
"""
+# pylint: disable=unused-argument
from typing import Optional, Tuple
import re
import flask
+from searx.tools import config
+
+
USER_AGENT = (
r'('
+ r'unknown'
@@ -44,7 +48,7 @@ def regexp_user_agent():
return _regexp
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
user_agent = request.headers.get('User-Agent', 'unknown')
if regexp_user_agent().match(user_agent):
return (
diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
index fce3f8b67..2646920c2 100644
--- a/searx/botdetection/ip_limit.py
+++ b/searx/botdetection/ip_limit.py
@@ -1,4 +1,5 @@
-"""
+""".. _botdetection.ip_limit:
+
Method ``ip_limit``
-------------------
@@ -22,6 +23,8 @@ The :py:obj:`link_token` method is used to investigate whether a request is
from typing import Optional, Tuple
import flask
+from searx.tools import config
+
from searx import redisdb
from searx import logger
@@ -56,7 +59,7 @@ API_MAX = 4
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
redis_client = redisdb.client()
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
@@ -68,7 +71,9 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
if c > API_MAX:
return 429, "BLOCK %s: API limit exceeded"
- suspicious = link_token.is_suspicious(request)
+ suspicious = False
+ if cfg['botdetection.ip_limit.link_token']:
+ suspicious = link_token.is_suspicious(request)
if suspicious:
c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
diff --git a/searx/botdetection/limiter.py b/searx/botdetection/limiter.py
index 71044c312..cc1e00b3c 100644
--- a/searx/botdetection/limiter.py
+++ b/searx/botdetection/limiter.py
@@ -38,8 +38,11 @@ and set the redis-url connection. Check the value, it depends on your redis DB
"""
from typing import Optional, Tuple
+from pathlib import Path
import flask
+import pytomlpp as toml
+from searx.tools import config
from searx.botdetection import (
http_accept,
http_accept_encoding,
@@ -49,6 +52,42 @@ from searx.botdetection import (
ip_limit,
)
+LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
+"""Base configuration (schema) of the botdetection."""
+
+LIMITER_CFG = Path('/etc/searxng/limiter.toml')
+"""Lokal Limiter configuration."""
+
+CFG_DEPRECATED = {
+ # "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
+}
+
+CFG = config.Config({}, {})
+
+
+def init_cfg(log):
+ global CFG # pylint: disable=global-statement
+ CFG = config.Config(cfg_schema=toml.load(LIMITER_CFG_SCHEMA), deprecated=CFG_DEPRECATED)
+
+ if not LIMITER_CFG.exists():
+ log.warning("missing config file: %s", LIMITER_CFG)
+ return
+
+ log.warning("load config file: %s", LIMITER_CFG)
+ try:
+ upd_cfg = toml.load(LIMITER_CFG)
+ except toml.DecodeError as exc:
+ msg = str(exc).replace('\t', '').replace('\n', ' ')
+ log.error("%s: %s", LIMITER_CFG, msg)
+ raise
+
+ is_valid, issue_list = CFG.validate(upd_cfg)
+ for msg in issue_list:
+ log.error(str(msg))
+ if not is_valid:
+ raise TypeError(f"schema of {LIMITER_CFG} is invalid, can't cutomize limiter configuration from!")
+ CFG.update(upd_cfg)
+
def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
@@ -58,7 +97,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
for func in [
http_user_agent,
]:
- val = func.filter_request(request)
+ val = func.filter_request(request, CFG)
if val is not None:
return val
@@ -72,7 +111,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
http_user_agent,
ip_limit,
]:
- val = func.filter_request(request)
+ val = func.filter_request(request, CFG)
if val is not None:
return val
diff --git a/searx/botdetection/limiter.toml b/searx/botdetection/limiter.toml
new file mode 100644
index 000000000..30cd1b53c
--- /dev/null
+++ b/searx/botdetection/limiter.toml
@@ -0,0 +1,3 @@
+[botdetection.ip_limit]
+
+link_token = true \ No newline at end of file