summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2020-11-16 12:44:07 +0100
committerAlexandre Flament <alex@al-f.net>2020-11-20 15:29:21 +0100
commit3786920df975b11c0feb7d8564eb19b634d32977 (patch)
tree3d0bdc1fb29ef3a5a400dd78960cc2cae22f8e1a
parent2fc3b17c85512280173bb47f449cc2faa64b0501 (diff)
downloadsearxng-3786920df975b11c0feb7d8564eb19b634d32977.tar.gz
searxng-3786920df975b11c0feb7d8564eb19b634d32977.zip
[enh] Add multiple outgoing proxies
credits go to @bauruine see https://github.com/searx/searx/pull/1958
-rw-r--r--docs/admin/settings.rst52
-rw-r--r--searx/engines/__init__.py18
-rw-r--r--searx/poolrequests.py30
-rw-r--r--searx/search.py2
-rw-r--r--searx/settings.yml16
-rwxr-xr-xsearx/webapp.py5
-rw-r--r--tests/unit/test_poolrequests.py89
7 files changed, 174 insertions, 38 deletions
diff --git a/docs/admin/settings.rst b/docs/admin/settings.rst
index 17dee4da8..58bce3541 100644
--- a/docs/admin/settings.rst
+++ b/docs/admin/settings.rst
@@ -36,18 +36,26 @@ Global Settings
image_proxy : False # proxying image results through searx
default_locale : "" # default interface locale
- # uncomment below section if you want to use a proxy
-
- #outgoing_proxies :
- # http : http://127.0.0.1:8080
- # https: http://127.0.0.1:8080
-
- # uncomment below section only if you have more than one network interface
- # which can be the source of outgoing search requests
-
- #source_ips:
- # - 1.1.1.1
- # - 1.1.1.2
+ outgoing: # communication with search engines
+ request_timeout : 2.0 # default timeout in seconds, can be override by engine
+ # max_request_timeout: 10.0 # the maximum timeout in seconds
+ useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator
+ pool_connections : 100 # Number of different hosts
+ pool_maxsize : 10 # Number of simultaneous requests by host
+
+ #proxies:
+ # http:
+ # - http://proxy1:8080
+ # - http://proxy2:8080
+ # https:
+ # - http://proxy1:8080
+ # - http://proxy2:8080
+ # - socks5://user:password@proxy3:1080
+ # - socks5h://user:password@proxy4:1080
+
+ #source_ips:
+ # - 1.1.1.1
+ # - 1.1.1.2
locales:
en : English
@@ -105,15 +113,16 @@ Global Settings
code, like ``fr``, ``en``, ``de``.
.. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies
-.. _PR SOCKS support: https://github.com/kennethreitz/requests/pull/478
+.. _PySocks: https://pypi.org/project/PySocks/
-``outgoing_proxies`` :
- Define a proxy you wish to use, see `requests proxies`_. SOCKS proxies are
- not supported / see `PR SOCKS support`.
+``proxies`` :
+ Define one or more proxies you wish to use, see `requests proxies`_.
+ If there are more than one proxy for one protocol (http, https),
+ requests to the engines are distributed in a round-robin fashion.
``source_ips`` :
If you use multiple network interfaces, define from which IP the requests must
- be made.
+ be made. This parameter is ignored when ``proxies`` is set.
``locales`` :
Locales codes and their names. Available translations of searx interface.
@@ -139,6 +148,15 @@ Engine settings
api_key : 'apikey'
disabled : True
language : en_US
+ #proxies:
+ # http:
+ # - http://proxy1:8080
+ # - http://proxy2:8080
+ # https:
+ # - http://proxy1:8080
+ # - http://proxy2:8080
+ # - socks5://user:password@proxy3:1080
+ # - socks5h://user:password@proxy4:1080
``name`` :
Name that will be used across searx to define this engine. In settings, on
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 00be89412..a80afb1c6 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -25,7 +25,7 @@ from operator import itemgetter
from searx import settings
from searx import logger
from searx.data import ENGINES_LANGUAGES
-from searx.poolrequests import get
+from searx.poolrequests import get, get_proxy_cycles
from searx.utils import load_module, match_language, get_engine_from_settings
@@ -79,16 +79,18 @@ def load_engine(engine_data):
logger.exception('Cannot load engine "{}"'.format(engine_module))
return None
- for param_name in engine_data:
+ for param_name, param_value in engine_data.items():
if param_name == 'engine':
- continue
- if param_name == 'categories':
- if engine_data['categories'] == 'none':
+ pass
+ elif param_name == 'categories':
+ if param_value == 'none':
engine.categories = []
else:
- engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
- continue
- setattr(engine, param_name, engine_data[param_name])
+ engine.categories = list(map(str.strip, param_value.split(',')))
+ elif param_name == 'proxies':
+ engine.proxies = get_proxy_cycles(param_value)
+ else:
+ setattr(engine, param_name, param_value)
for arg_name, arg_value in engine_default_args.items():
if not hasattr(engine, arg_name):
diff --git a/searx/poolrequests.py b/searx/poolrequests.py
index e03797ce2..1eedc84b8 100644
--- a/searx/poolrequests.py
+++ b/searx/poolrequests.py
@@ -111,6 +111,32 @@ def get_time_for_thread():
return threadLocal.total_time
+def get_proxy_cycles(proxy_settings):
+ if not proxy_settings:
+ return None
+ # Backwards compatibility for single proxy in settings.yml
+ for protocol, proxy in proxy_settings.items():
+ if isinstance(proxy, str):
+ proxy_settings[protocol] = [proxy]
+
+ for protocol in proxy_settings:
+ proxy_settings[protocol] = cycle(proxy_settings[protocol])
+ return proxy_settings
+
+
+GLOBAL_PROXY_CYCLES = get_proxy_cycles(settings['outgoing'].get('proxies'))
+
+
+def get_proxies(proxy_cycles):
+ if proxy_cycles:
+ return {protocol: next(proxy_cycle) for protocol, proxy_cycle in proxy_cycles.items()}
+ return None
+
+
+def get_global_proxies():
+ return get_proxies(GLOBAL_PROXY_CYCLES)
+
+
def request(method, url, **kwargs):
"""same as requests/requests/api.py request(...)"""
time_before_request = time()
@@ -119,8 +145,8 @@ def request(method, url, **kwargs):
session = SessionSinglePool()
# proxies
- if kwargs.get('proxies') is None:
- kwargs['proxies'] = settings['outgoing'].get('proxies')
+ if not kwargs.get('proxies'):
+ kwargs['proxies'] = get_global_proxies()
# timeout
if 'timeout' in kwargs:
diff --git a/searx/search.py b/searx/search.py
index 1cb2a603b..b8ada3901 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -119,7 +119,7 @@ def send_http_request(engine, request_params):
# setting engine based proxies
if hasattr(engine, 'proxies'):
- request_args['proxies'] = engine.proxies
+ request_args['proxies'] = requests_lib.get_proxies(engine.proxies)
# specific type of request (GET or POST)
if request_params['method'] == 'GET':
diff --git a/searx/settings.yml b/searx/settings.yml
index 78ae26b97..8af1a17f1 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -63,13 +63,15 @@ outgoing: # communication with search engines
pool_connections : 100 # Number of different hosts
pool_maxsize : 10 # Number of simultaneous requests by host
# uncomment below section if you want to use a proxy
-# see http://docs.python-requests.org/en/latest/user/advanced/#proxies
-# SOCKS proxies are also supported: see http://requests.readthedocs.io/en/master/user/advanced/#socks
-# proxies :
-# http : socks5h://127.0.0.1:9050
-# https: socks5h://127.0.0.1:9050
-# using_tor_proxy : True
-# extra_proxy_timeout : 10.0 # Extra seconds to add in order to account for the time taken by the proxy
+# see https://2.python-requests.org/en/latest/user/advanced/#proxies
+# SOCKS proxies are also supported: see https://2.python-requests.org/en/latest/user/advanced/#socks
+# proxies:
+# http:
+# - http://proxy1:8080
+# - http://proxy2:8080
+# https:
+# - http://proxy1:8080
+# - http://proxy2:8080
# uncomment below section only if you have more than one network interface
# which can be the source of outgoing search requests
# source_ips:
diff --git a/searx/webapp.py b/searx/webapp.py
index 2a93c3765..26416c5aa 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -78,6 +78,7 @@ from searx.plugins import plugins
from searx.plugins.oa_doi_rewrite import get_doi_resolver
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers
+from searx.poolrequests import get_global_proxies
# serve pages with HTTP/1.1
@@ -149,8 +150,6 @@ _category_names = (gettext('files'),
gettext('onions'),
gettext('science'))
-outgoing_proxies = settings['outgoing'].get('proxies') or None
-
_flask_babel_get_translations = flask_babel.get_translations
@@ -905,7 +904,7 @@ def image_proxy():
stream=True,
timeout=settings['outgoing']['request_timeout'],
headers=headers,
- proxies=outgoing_proxies)
+ proxies=get_global_proxies())
if resp.status_code == 304:
return '', resp.status_code
diff --git a/tests/unit/test_poolrequests.py b/tests/unit/test_poolrequests.py
new file mode 100644
index 000000000..b22685fd0
--- /dev/null
+++ b/tests/unit/test_poolrequests.py
@@ -0,0 +1,89 @@
+from unittest.mock import patch
+from requests.models import Response
+
+from searx.testing import SearxTestCase
+
+import searx.poolrequests
+from searx.poolrequests import get_proxy_cycles, get_proxies
+
+
+CONFIG = {'http': ['http://localhost:9090', 'http://localhost:9092'],
+ 'https': ['http://localhost:9091', 'http://localhost:9093']}
+
+
+class TestProxy(SearxTestCase):
+
+ def test_noconfig(self):
+ cycles = get_proxy_cycles(None)
+ self.assertIsNone(cycles)
+
+ cycles = get_proxy_cycles(False)
+ self.assertIsNone(cycles)
+
+ def test_oldconfig(self):
+ config = {
+ 'http': 'http://localhost:9090',
+ 'https': 'http://localhost:9091',
+ }
+ cycles = get_proxy_cycles(config)
+ self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+ self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+ self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+ self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+
+ def test_one_proxy(self):
+ config = {
+ 'http': ['http://localhost:9090'],
+ 'https': ['http://localhost:9091'],
+ }
+ cycles = get_proxy_cycles(config)
+ self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+ self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+ self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+ self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+
+ def test_multiple_proxies(self):
+ cycles = get_proxy_cycles(CONFIG)
+ self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+ self.assertEqual(next(cycles['http']), 'http://localhost:9092')
+ self.assertEqual(next(cycles['http']), 'http://localhost:9090')
+ self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+ self.assertEqual(next(cycles['https']), 'http://localhost:9093')
+ self.assertEqual(next(cycles['https']), 'http://localhost:9091')
+
+ def test_getproxies_none(self):
+ self.assertIsNone(get_proxies(None))
+
+ def test_getproxies_config(self):
+ cycles = get_proxy_cycles(CONFIG)
+ self.assertEqual(get_proxies(cycles), {
+ 'http': 'http://localhost:9090',
+ 'https': 'http://localhost:9091'
+ })
+ self.assertEqual(get_proxies(cycles), {
+ 'http': 'http://localhost:9092',
+ 'https': 'http://localhost:9093'
+ })
+
+ @patch('searx.poolrequests.get_global_proxies')
+ def test_request(self, mock_get_global_proxies):
+ method = 'GET'
+ url = 'http://localhost'
+ custom_proxies = {
+ 'https': 'http://localhost:1080'
+ }
+ global_proxies = {
+ 'http': 'http://localhost:9092',
+ 'https': 'http://localhost:9093'
+ }
+ mock_get_global_proxies.return_value = global_proxies
+
+ # check the global proxies usage
+ with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method:
+ searx.poolrequests.request(method, url)
+ mock_method.assert_called_once_with(method=method, url=url, proxies=global_proxies)
+
+ # check if the proxies parameter overrides the global proxies
+ with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method:
+ searx.poolrequests.request(method, url, proxies=custom_proxies)
+ mock_method.assert_called_once_with(method=method, url=url, proxies=custom_proxies)