1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pyright: basic
"""Some bot protection / rate limitation
To monitor rate limits and protect privacy the IP addresses are getting stored
with a hash so the limiter plugin knows who to block. A redis database is
needed to store the hash values.
Enable the plugin in ``settings.yml``:
- ``server.limiter: true``
- ``redis.url: ...`` check the value, see :ref:`settings redis`
"""
import re
from flask import request
from searx import redisdb
from searx.plugins import logger
from searx.redislib import incr_sliding_window, secret_hash
name = "Request limiter"
description = "Limit the number of request"
default_on = False
preference_section = 'service'
logger = logger.getChild('limiter')
block_user_agent = re.compile(
r'('
+ r'unknown'
+ r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
+ r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
+ r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
+ r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
+ r'|ZmEu|BLEXBot|bitlybot'
# unmaintained Farside instances
+ r'|'
+ re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
+ '|.*PetalBot.*'
+ r')'
)
PING_KEY = 'SearXNG_limiter.ping'
TOKEN_KEY = 'SearXNG_limiter.token'
def ping():
redis_client = redisdb.client()
user_agent = request.headers.get('User-Agent', 'unknown')
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
ping_key = PING_KEY + user_agent + x_forwarded_for
redis_client.set(secret_hash(ping_key), 1, ex=600)
def is_accepted_request() -> bool:
# pylint: disable=too-many-return-statements
redis_client = redisdb.client()
user_agent = request.headers.get('User-Agent', 'unknown')
x_forwarded_for = request.headers.get('X-Forwarded-For', '')
if request.path == '/healthz':
return True
if block_user_agent.match(user_agent):
logger.debug("BLOCK %s: %s --> detected User-Agent: %s" % (x_forwarded_for, request.path, user_agent))
return False
if request.path == '/search':
c_burst_max = 2
c_10min_max = 10
ping_key = PING_KEY + user_agent + x_forwarded_for
if redis_client.get(secret_hash(ping_key)):
logger.debug('got a ping')
c_burst_max = 15
c_10min_max = 150
else:
logger.debug('missing a ping')
c_burst = incr_sliding_window(redis_client, 'IP limit, burst' + x_forwarded_for, 20)
c_10min = incr_sliding_window(redis_client, 'IP limit, 10 minutes' + x_forwarded_for, 600)
if c_burst > c_burst_max or c_10min > c_10min_max:
logger.debug("BLOCK %s: to many request", x_forwarded_for)
return False
if len(request.headers.get('Accept-Language', '').strip()) == '':
logger.debug("BLOCK %s: missing Accept-Language", x_forwarded_for)
return False
if request.headers.get('Connection') == 'close':
logger.debug("BLOCK %s: got Connection=close", x_forwarded_for)
return False
accept_encoding_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
if 'gzip' not in accept_encoding_list and 'deflate' not in accept_encoding_list:
logger.debug("BLOCK %s: suspicious Accept-Encoding", x_forwarded_for)
return False
if 'text/html' not in request.accept_mimetypes:
logger.debug("BLOCK %s: Accept-Encoding misses text/html", x_forwarded_for)
return False
if request.args.get('format', 'html') != 'html':
c = incr_sliding_window(redis_client, 'API limit' + x_forwarded_for, 3600)
if c > 4:
logger.debug("BLOCK %s: API limit exceeded", x_forwarded_for)
return False
logger.debug(
"OK %s: '%s'" % (x_forwarded_for, request.path)
+ " || form: %s" % request.form
+ " || Accept: %s" % request.headers.get('Accept', '')
+ " || Accept-Language: %s" % request.headers.get('Accept-Language', '')
+ " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding', '')
+ " || Content-Type: %s" % request.headers.get('Content-Type', '')
+ " || Content-Length: %s" % request.headers.get('Content-Length', '')
+ " || Connection: %s" % request.headers.get('Connection', '')
+ " || User-Agent: %s" % user_agent
)
return True
def pre_request():
if not is_accepted_request():
return 'Too Many Requests', 429
return None
def init(app, settings):
if not settings['server']['limiter']:
return False
if not redisdb.client():
logger.error("The limiter requires Redis") # pylint: disable=undefined-variable
return False
app.before_request(pre_request)
return True
|