summaryrefslogtreecommitdiff
path: root/searx/plugins
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2023-04-01 12:34:58 +0200
committerMarkus Heiser <markus.heiser@darmarit.de>2023-04-01 19:42:49 +0200
commit66810ce71122183f52446cb110dc76e4cae9b7ba (patch)
tree5849c9772523724f42cd58d92c98a361f379e941 /searx/plugins
parent7592d85982d0878940b4c9d57e78e51047adf8d7 (diff)
downloadsearxng-66810ce71122183f52446cb110dc76e4cae9b7ba.tar.gz
searxng-66810ce71122183f52446cb110dc76e4cae9b7ba.zip
[mod] limiter: minor improvements
- requests without HTTP header 'Connection' or missing 'User-Agent' will be blocked by the limiter - re_bot is related to 'User-Agent' and has been renamed to block_user_agent Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/plugins')
-rw-r--r--searx/plugins/limiter.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/searx/plugins/limiter.py b/searx/plugins/limiter.py
index baf2c1726..e1cd0e408 100644
--- a/searx/plugins/limiter.py
+++ b/searx/plugins/limiter.py
@@ -26,13 +26,17 @@ default_on = False
preference_section = 'service'
logger = logger.getChild('limiter')
-re_bot = re.compile(
+block_user_agent = re.compile(
r'('
- + r'[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
+ + r'unknown'
+ + r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
+ r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
+ r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
+ r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
+ r'|ZmEu|BLEXBot|bitlybot'
+ # when you block requests from Farside instances, your instance will
+ # disappear from https://farside.link/
+ # + r'|Farside'
+ r')'
)
@@ -40,14 +44,15 @@ re_bot = re.compile(
def is_accepted_request() -> bool:
# pylint: disable=too-many-return-statements
redis_client = redisdb.client()
- user_agent = request.headers.get('User-Agent', '')
+ user_agent = request.headers.get('User-Agent', 'unknown')
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
- if re_bot.match(user_agent):
- logger.debug("BLOCK %s: detected bot", x_forwarded_for)
+ if block_user_agent.match(user_agent):
+ logger.debug("BLOCK %s: %s --> detected User-Agent: %s" % (x_forwarded_for, request.path, user_agent))
return False
if request.path == '/search':
+
c_burst = incr_sliding_window(redis_client, 'IP limit, burst' + x_forwarded_for, 20)
c_10min = incr_sliding_window(redis_client, 'IP limit, 10 minutes' + x_forwarded_for, 600)
if c_burst > 15 or c_10min > 150: