summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/admin/engines/settings.rst74
-rw-r--r--searx/enginelib/__init__.py14
-rw-r--r--searx/enginelib/traits.py6
-rw-r--r--searx/engines/__init__.py69
-rw-r--r--searx/engines/archlinux.py25
-rw-r--r--searx/engines/bing.py18
-rw-r--r--searx/engines/dailymotion.py20
-rw-r--r--searx/engines/duckduckgo.py24
-rw-r--r--searx/engines/google.py18
-rw-r--r--searx/engines/peertube.py22
-rw-r--r--searx/engines/startpage.py28
-rw-r--r--searx/engines/wikipedia.py6
-rw-r--r--searx/settings_defaults.py2
13 files changed, 204 insertions, 122 deletions
diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst
index 118e01efd..250a27461 100644
--- a/docs/admin/engines/settings.rst
+++ b/docs/admin/engines/settings.rst
@@ -397,14 +397,26 @@ Communication with search engines.
Global timeout of the requests made to others engines in seconds. A bigger
timeout will allow to wait for answers from slow engines, but in consequence
will slow SearXNG reactivity (the result page may take the time specified in the
- timeout to load). Can be override by :ref:`settings engine`
+ timeout to load). Can be override by ``timeout`` in the :ref:`settings engine`.
``useragent_suffix`` :
Suffix to the user-agent SearXNG uses to send requests to others engines. If an
engine wish to block you, a contact info here may be useful to avoid that.
+.. _Pool limit configuration: https://www.python-httpx.org/advanced/#pool-limit-configuration
+
+``pool_maxsize``:
+ Number of allowable keep-alive connections, or ``null`` to always allow. The
+ default is 10. See ``max_keepalive_connections`` `Pool limit configuration`_.
+
+``pool_connections`` :
+ Maximum number of allowable connections, or ``null`` # for no limits. The
+ default is 100. See ``max_connections`` `Pool limit configuration`_.
+
``keepalive_expiry`` :
- Number of seconds to keep a connection in the pool. By default 5.0 seconds.
+ Number of seconds to keep a connection in the pool. By default 5.0 seconds.
+ See ``keepalive_expiry`` `Pool limit configuration`_.
+
.. _httpx proxies: https://www.python-httpx.org/advanced/#http-proxying
@@ -429,15 +441,6 @@ Communication with search engines.
Number of retry in case of an HTTP error. On each retry, SearXNG uses an
different proxy and source ip.
-``retry_on_http_error`` :
- Retry request on some HTTP status code.
-
- Example:
-
- * ``true`` : on HTTP status code between 400 and 599.
- * ``403`` : on HTTP status code 403.
- * ``[403, 429]``: on HTTP status code 403 and 429.
-
``enable_http2`` :
Enable by default. Set to ``false`` to disable HTTP/2.
@@ -455,6 +458,11 @@ Communication with search engines.
``max_redirects`` :
30 by default. Maximum redirect before it is an error.
+``using_tor_proxy`` :
+ Using tor proxy (``true``) or not (``false``) for all engines. The default is
+ ``false`` and can be overwritten in the :ref:`settings engine`
+
+
.. _settings categories_as_tabs:
@@ -522,13 +530,14 @@ engine is shown. Most of the options have a default value or even are optional.
use_official_api: true
require_api_key: true
results: HTML
- enable_http: false
+
+ # overwrite values from section 'outgoing:'
enable_http2: false
retries: 1
- retry_on_http_error: true # or 403 or [404, 429]
max_connections: 100
max_keepalive_connections: 10
keepalive_expiry: 5.0
+ using_tor_proxy: false
proxies:
http:
- http://proxy1:8080
@@ -539,6 +548,11 @@ engine is shown. Most of the options have a default value or even are optional.
- socks5://user:password@proxy3:1080
- socks5h://user:password@proxy4:1080
+ # other network settings
+ enable_http: false
+ retry_on_http_error: true # or 403 or [404, 429]
+
+
``name`` :
Name that will be used across SearXNG to define this engine. In settings, on
the result page...
@@ -579,7 +593,8 @@ engine is shown. Most of the options have a default value or even are optional.
query all search engines in that category (group).
``timeout`` : optional
- Timeout of the search with the current search engine. **Be careful, it will
+ Timeout of the search with the current search engine. Overwrites
+ ``request_timeout`` from :ref:`settings outgoing`. **Be careful, it will
modify the global timeout of SearXNG.**
``api_key`` : optional
@@ -615,6 +630,37 @@ engine is shown. Most of the options have a default value or even are optional.
- ``ipv4`` set ``local_addresses`` to ``0.0.0.0`` (use only IPv4 local addresses)
- ``ipv6`` set ``local_addresses`` to ``::`` (use only IPv6 local addresses)
+``enable_http`` : optional
+ Enable HTTP for this engine (by default only HTTPS is enabled).
+
+``retry_on_http_error`` : optional
+ Retry request on some HTTP status code.
+
+ Example:
+
+ * ``true`` : on HTTP status code between 400 and 599.
+ * ``403`` : on HTTP status code 403.
+ * ``[403, 429]``: on HTTP status code 403 and 429.
+
+``proxies`` :
+ Overwrites proxy settings from :ref:`settings outgoing`.
+
+``using_tor_proxy`` :
+ Using tor proxy (``true``) or not (``false``) for this engine. The default is
+ taken from ``using_tor_proxy`` of the :ref:`settings outgoing`.
+
+``max_keepalive_connection#s`` :
+ `Pool limit configuration`_, overwrites value ``pool_maxsize`` from
+ :ref:`settings outgoing` for this engine.
+
+``max_connections`` :
+ `Pool limit configuration`_, overwrites value ``pool_connections`` from
+ :ref:`settings outgoing` for this engine.
+
+``keepalive_expiry`` :
+ `Pool limit configuration`_, overwrites value ``keepalive_expiry`` from
+ :ref:`settings outgoing` for this engine.
+
.. note::
A few more options are possible, but they are pretty specific to some
diff --git a/searx/enginelib/__init__.py b/searx/enginelib/__init__.py
index 00962e215..fd3019e6c 100644
--- a/searx/enginelib/__init__.py
+++ b/searx/enginelib/__init__.py
@@ -17,7 +17,7 @@
from __future__ import annotations
-from typing import Union, Dict, List, Callable, TYPE_CHECKING
+from typing import List, Callable, TYPE_CHECKING
if TYPE_CHECKING:
from searx.enginelib import traits
@@ -134,3 +134,15 @@ class Engine: # pylint: disable=too-few-public-methods
require_api_key: true
results: HTML
"""
+
+ using_tor_proxy: bool
+ """Using tor proxy (``true``) or not (``false``) for this engine."""
+
+ send_accept_language_header: bool
+ """When this option is activated, the language (locale) that is selected by
+ the user is used to build and send a ``Accept-Language`` header in the
+ request to the origin search engine."""
+
+ tokens: List[str]
+ """A list of secret tokens to make this engine *private*, more details see
+ :ref:`private engines`."""
diff --git a/searx/enginelib/traits.py b/searx/enginelib/traits.py
index ae27d46f1..8a7356ce2 100644
--- a/searx/enginelib/traits.py
+++ b/searx/enginelib/traits.py
@@ -13,6 +13,7 @@ used.
from __future__ import annotations
import json
import dataclasses
+import types
from typing import Dict, Iterable, Union, Callable, Optional, TYPE_CHECKING
from typing_extensions import Literal, Self
@@ -82,8 +83,7 @@ class EngineTraits:
"""
custom: Dict[str, Union[Dict[str, Dict], Iterable[str]]] = dataclasses.field(default_factory=dict)
- """A place to store engine's custom traits, not related to the SearXNG core
-
+ """A place to store engine's custom traits, not related to the SearXNG core.
"""
def get_language(self, searxng_locale: str, default=None):
@@ -228,7 +228,7 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
return obj
- def set_traits(self, engine: Engine):
+ def set_traits(self, engine: Engine | types.ModuleType):
"""Set traits in a :py:obj:`Engine` namespace.
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index a2db26816..e9e9f87c9 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -17,7 +17,9 @@ import sys
import copy
from os.path import realpath, dirname
-from typing import TYPE_CHECKING, Dict, Optional
+from typing import TYPE_CHECKING, Dict
+import types
+import inspect
from searx import logger, settings
from searx.utils import load_module
@@ -28,21 +30,23 @@ if TYPE_CHECKING:
logger = logger.getChild('engines')
ENGINE_DIR = dirname(realpath(__file__))
ENGINE_DEFAULT_ARGS = {
+ # Common options in the engine module
"engine_type": "online",
- "inactive": False,
- "disabled": False,
- "timeout": settings["outgoing"]["request_timeout"],
- "shortcut": "-",
- "categories": ["general"],
"paging": False,
- "safesearch": False,
"time_range_support": False,
+ "safesearch": False,
+ # settings.yml
+ "categories": ["general"],
"enable_http": False,
- "using_tor_proxy": False,
+ "shortcut": "-",
+ "timeout": settings["outgoing"]["request_timeout"],
"display_error_messages": True,
+ "disabled": False,
+ "inactive": False,
+ "about": {},
+ "using_tor_proxy": False,
"send_accept_language_header": False,
"tokens": [],
- "about": {},
}
# set automatically when an engine does not have any tab category
DEFAULT_CATEGORY = 'other'
@@ -51,7 +55,7 @@ DEFAULT_CATEGORY = 'other'
# Defaults for the namespace of an engine module, see :py:func:`load_engine`
categories = {'general': []}
-engines: Dict[str, Engine] = {}
+engines: Dict[str, Engine | types.ModuleType] = {}
engine_shortcuts = {}
"""Simple map of registered *shortcuts* to name of the engine (or ``None``).
@@ -63,7 +67,19 @@ engine_shortcuts = {}
"""
-def load_engine(engine_data: dict) -> Optional[Engine]:
+def check_engine_module(module: types.ModuleType):
+ # probe unintentional name collisions / for example name collisions caused
+ # by import statements in the engine module ..
+
+ # network: https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
+ obj = getattr(module, 'network', None)
+ if obj and inspect.ismodule(obj):
+ msg = f'type of {module.__name__}.network is a module ({obj.__name__}), expected a string'
+ # logger.error(msg)
+ raise TypeError(msg)
+
+
+def load_engine(engine_data: dict) -> Engine | types.ModuleType | None:
"""Load engine from ``engine_data``.
:param dict engine_data: Attributes from YAML ``settings:engines/<engine>``
@@ -100,19 +116,20 @@ def load_engine(engine_data: dict) -> Optional[Engine]:
engine_data['name'] = engine_name
# load_module
- engine_module = engine_data.get('engine')
- if engine_module is None:
+ module_name = engine_data.get('engine')
+ if module_name is None:
logger.error('The "engine" field is missing for the engine named "{}"'.format(engine_name))
return None
try:
- engine = load_module(engine_module + '.py', ENGINE_DIR)
+ engine = load_module(module_name + '.py', ENGINE_DIR)
except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
- logger.exception('Fatal exception in engine "{}"'.format(engine_module))
+ logger.exception('Fatal exception in engine "{}"'.format(module_name))
sys.exit(1)
except BaseException:
- logger.exception('Cannot load engine "{}"'.format(engine_module))
+ logger.exception('Cannot load engine "{}"'.format(module_name))
return None
+ check_engine_module(engine)
update_engine_attributes(engine, engine_data)
update_attributes_for_tor(engine)
@@ -153,18 +170,18 @@ def set_loggers(engine, engine_name):
and not hasattr(module, "logger")
):
module_engine_name = module_name.split(".")[-1]
- module.logger = logger.getChild(module_engine_name)
+ module.logger = logger.getChild(module_engine_name) # type: ignore
-def update_engine_attributes(engine: Engine, engine_data):
+def update_engine_attributes(engine: Engine | types.ModuleType, engine_data):
# set engine attributes from engine_data
for param_name, param_value in engine_data.items():
if param_name == 'categories':
if isinstance(param_value, str):
param_value = list(map(str.strip, param_value.split(',')))
- engine.categories = param_value
+ engine.categories = param_value # type: ignore
elif hasattr(engine, 'about') and param_name == 'about':
- engine.about = {**engine.about, **engine_data['about']}
+ engine.about = {**engine.about, **engine_data['about']} # type: ignore
else:
setattr(engine, param_name, param_value)
@@ -174,10 +191,10 @@ def update_engine_attributes(engine: Engine, engine_data):
setattr(engine, arg_name, copy.deepcopy(arg_value))
-def update_attributes_for_tor(engine: Engine) -> bool:
+def update_attributes_for_tor(engine: Engine | types.ModuleType):
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
- engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')
- engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0)
+ engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore
+ engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore
def is_missing_required_attributes(engine):
@@ -193,12 +210,12 @@ def is_missing_required_attributes(engine):
return missing
-def using_tor_proxy(engine: Engine):
+def using_tor_proxy(engine: Engine | types.ModuleType):
"""Return True if the engine configuration declares to use Tor."""
return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False)
-def is_engine_active(engine: Engine):
+def is_engine_active(engine: Engine | types.ModuleType):
# check if engine is inactive
if engine.inactive is True:
return False
@@ -210,7 +227,7 @@ def is_engine_active(engine: Engine):
return True
-def register_engine(engine: Engine):
+def register_engine(engine: Engine | types.ModuleType):
if engine.name in engines:
logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
sys.exit(1)
diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py
index 56c3b447f..17bb1b6c5 100644
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
@@ -14,7 +14,6 @@ from urllib.parse import urlencode, urljoin, urlparse
import lxml
import babel
-from searx import network
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
from searx.enginelib.traits import EngineTraits
from searx.locales import language_tag
@@ -45,13 +44,13 @@ main_wiki = 'wiki.archlinux.org'
def request(query, params):
sxng_lang = params['searxng_locale'].split('-')[0]
- netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
- title = traits.custom['title'].get(sxng_lang, 'Special:Search')
+ netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore
+ title: str = traits.custom['title'].get(sxng_lang, 'Special:Search') # type: ignore
base_url = 'https://' + netloc + '/index.php?'
offset = (params['pageno'] - 1) * 20
if netloc == main_wiki:
- eng_lang: str = traits.get_language(sxng_lang, 'English')
+ eng_lang: str = traits.get_language(sxng_lang, 'English') # type: ignore
query += ' (' + eng_lang + ')'
elif netloc == 'wiki.archlinuxcn.org':
base_url = 'https://' + netloc + '/wzh/index.php?'
@@ -71,11 +70,11 @@ def request(query, params):
def response(resp):
results = []
- dom = lxml.html.fromstring(resp.text)
+ dom = lxml.html.fromstring(resp.text) # type: ignore
# get the base URL for the language in which request was made
sxng_lang = resp.search_params['searxng_locale'].split('-')[0]
- netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)
+ netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore
base_url = 'https://' + netloc + '/index.php?'
for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'):
@@ -83,7 +82,7 @@ def response(resp):
content = extract_text(result.xpath('.//div[@class="searchresult"]'))
results.append(
{
- 'url': urljoin(base_url, link.get('href')),
+ 'url': urljoin(base_url, link.get('href')), # type: ignore
'title': extract_text(link),
'content': content,
}
@@ -114,6 +113,8 @@ def fetch_traits(engine_traits: EngineTraits):
},
"""
+ # pylint: disable=import-outside-toplevel
+ from searx.network import get # see https://github.com/searxng/searxng/issues/762
engine_traits.custom['wiki_netloc'] = {}
engine_traits.custom['title'] = {}
@@ -125,11 +126,11 @@ def fetch_traits(engine_traits: EngineTraits):
'zh': 'Special:搜索',
}
- resp = network.get('https://wiki.archlinux.org/')
- if not resp.ok:
+ resp = get('https://wiki.archlinux.org/')
+ if not resp.ok: # type: ignore
print("ERROR: response from wiki.archlinix.org is not OK.")
- dom = lxml.html.fromstring(resp.text)
+ dom = lxml.html.fromstring(resp.text) # type: ignore
for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"):
sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-'))
@@ -143,9 +144,9 @@ def fetch_traits(engine_traits: EngineTraits):
print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag))
continue
engine_traits.custom['wiki_netloc'][sxng_tag] = netloc
- engine_traits.custom['title'][sxng_tag] = title
+ engine_traits.custom['title'][sxng_tag] = title # type: ignore
eng_tag = extract_text(eval_xpath_list(a, ".//span"))
- engine_traits.languages[sxng_tag] = eng_tag
+ engine_traits.languages[sxng_tag] = eng_tag # type: ignore
engine_traits.languages['en'] = 'English'
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 81a0cf6a5..3cd707870 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -38,7 +38,6 @@ import babel
import babel.languages
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
-from searx import network
from searx.locales import language_tag, region_tag
from searx.enginelib.traits import EngineTraits
@@ -180,6 +179,10 @@ def request(query, params):
def response(resp):
+ # pylint: disable=too-many-locals,import-outside-toplevel
+
+ from searx.network import Request, multi_requests # see https://github.com/searxng/searxng/issues/762
+
results = []
result_len = 0
@@ -231,9 +234,9 @@ def response(resp):
# resolve all Bing redirections in parallel
request_list = [
- network.Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
+ Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
]
- response_list = network.multi_requests(request_list)
+ response_list = multi_requests(request_list)
for i, redirect_response in enumerate(response_list):
if not isinstance(redirect_response, Exception):
results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
@@ -272,16 +275,19 @@ def fetch_traits(engine_traits: EngineTraits):
def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
+ # pylint: disable=too-many-locals,import-outside-toplevel
+
+ from searx.network import get # see https://github.com/searxng/searxng/issues/762
# insert alias to map from a language (zh) to a language + script (zh_Hans)
engine_traits.languages['zh'] = 'zh-hans'
- resp = network.get(url)
+ resp = get(url)
- if not resp.ok:
+ if not resp.ok: # type: ignore
print("ERROR: response from peertube is not OK.")
- dom = html.fromstring(resp.text)
+ dom = html.fromstring(resp.text) # type: ignore
map_lang = {'jp': 'ja'}
for td in eval_xpath(dom, xpath_language_codes):
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index d734ec3c8..99da9616c 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -18,9 +18,9 @@ from urllib.parse import urlencode
import time
import babel
-from searx.exceptions import SearxEngineAPIException
-from searx import network
+from searx.network import get, raise_for_httperror # see https://github.com/searxng/searxng/issues/762
from searx.utils import html_to_text
+from searx.exceptions import SearxEngineAPIException
from searx.locales import region_tag, language_tag
from searx.enginelib.traits import EngineTraits
@@ -106,7 +106,7 @@ def request(query, params):
if not query:
return False
- eng_region = traits.get_region(params['searxng_locale'], 'en_US')
+ eng_region: str = traits.get_region(params['searxng_locale'], 'en_US') # type: ignore
eng_lang = traits.get_language(params['searxng_locale'], 'en')
args = {
@@ -156,7 +156,7 @@ def response(resp):
if 'error' in search_res:
raise SearxEngineAPIException(search_res['error'].get('message'))
- network.raise_for_httperror(resp)
+ raise_for_httperror(resp)
# parse results
for res in search_res.get('list', []):
@@ -218,11 +218,11 @@ def fetch_traits(engine_traits: EngineTraits):
"""
- resp = network.get('https://api.dailymotion.com/locales')
- if not resp.ok:
+ resp = get('https://api.dailymotion.com/locales')
+ if not resp.ok: # type: ignore
print("ERROR: response from dailymotion/locales is not OK.")
- for item in resp.json()['list']:
+ for item in resp.json()['list']: # type: ignore
eng_tag = item['locale']
if eng_tag in ('en_EN', 'ar_AA'):
continue
@@ -241,11 +241,11 @@ def fetch_traits(engine_traits: EngineTraits):
locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
- resp = network.get('https://api.dailymotion.com/languages')
- if not resp.ok:
+ resp = get('https://api.dailymotion.com/languages')
+ if not resp.ok: # type: ignore
print("ERROR: response from dailymotion/languages is not OK.")
- for item in resp.json()['list']:
+ for item in resp.json()['list']: # type: ignore
eng_tag = item['code']
if eng_tag in locale_lang_list:
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index d37e28c2d..8349ad8e3 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -13,17 +13,17 @@ import babel
import lxml.html
from searx import (
- network,
locales,
redislib,
external_bang,
)
-from searx import redisdb
from searx.utils import (
eval_xpath,
eval_xpath_getindex,
extract_text,
)
+from searx.network import get # see https://github.com/searxng/searxng/issues/762
+from searx import redisdb
from searx.enginelib.traits import EngineTraits
from searx.exceptions import SearxEngineAPIException
@@ -95,8 +95,8 @@ def get_vqd(query, headers):
return value
query_url = 'https://duckduckgo.com/?q={query}&atb=v290-5'.format(query=urlencode({'q': query}))
- res = network.get(query_url, headers=headers)
- content = res.text
+ res = get(query_url, headers=headers)
+ content = res.text # type: ignore
if content.find('vqd=\"') == -1:
raise SearxEngineAPIException('Request failed')
value = content[content.find('vqd=\"') + 5 :]
@@ -139,7 +139,9 @@ def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
params['cookies']['kl'] = eng_region # 'ar-es'
"""
- return eng_traits.custom['lang_region'].get(sxng_locale, eng_traits.get_language(sxng_locale, default))
+ return eng_traits.custom['lang_region'].get( # type: ignore
+ sxng_locale, eng_traits.get_language(sxng_locale, default)
+ )
ddg_reg_map = {
@@ -358,13 +360,13 @@ def fetch_traits(engine_traits: EngineTraits):
engine_traits.all_locale = 'wt-wt'
# updated from u588 to u661 / should be updated automatically?
- resp = network.get('https://duckduckgo.com/util/u661.js')
+ resp = get('https://duckduckgo.com/util/u661.js')
- if not resp.ok:
+ if not resp.ok: # type: ignore
print("ERROR: response from DuckDuckGo is not OK.")
- pos = resp.text.find('regions:{') + 8
- js_code = resp.text[pos:]
+ pos = resp.text.find('regions:{') + 8 # type: ignore
+ js_code = resp.text[pos:] # type: ignore
pos = js_code.find('}') + 1
regions = json.loads(js_code[:pos])
@@ -399,8 +401,8 @@ def fetch_traits(engine_traits: EngineTraits):
engine_traits.custom['lang_region'] = {}
- pos = resp.text.find('languages:{') + 10
- js_code = resp.text[pos:]
+ pos = resp.text.find('languages:{') + 10 # type: ignore
+ js_code = resp.text[pos:] # type: ignore
pos = js_code.find('}') + 1
js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"')
languages = json.loads(js_code)
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 708068f3a..6aaac2f22 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -23,7 +23,7 @@ import babel.languages
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
from searx.locales import language_tag, region_tag, get_offical_locales
-from searx import network
+from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx.exceptions import SearxEngineCaptchaException
from searx.enginelib.traits import EngineTraits
@@ -419,11 +419,11 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
engine_traits.custom['supported_domains'] = {}
- resp = network.get('https://www.google.com/preferences')
- if not resp.ok:
+ resp = get('https://www.google.com/preferences')
+ if not resp.ok: # type: ignore
raise RuntimeError("Response from Google's preferences is not OK.")
- dom = html.fromstring(resp.text)
+ dom = html.fromstring(resp.text) # type: ignore
# supported language codes
@@ -474,18 +474,18 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
# supported domains
if add_domains:
- resp = network.get('https://www.google.com/supported_domains')
- if not resp.ok:
+ resp = get('https://www.google.com/supported_domains')
+ if not resp.ok: # type: ignore
raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.")
- for domain in resp.text.split():
+ for domain in resp.text.split(): # type: ignore
domain = domain.strip()
if not domain or domain in [
'.google.com',
]:
continue
region = domain.split('.')[-1].upper()
- engine_traits.custom['supported_domains'][region] = 'www' + domain
+ engine_traits.custom['supported_domains'][region] = 'www' + domain # type: ignore
if region == 'HK':
# There is no google.cn, we use .com.hk for zh-CN
- engine_traits.custom['supported_domains']['CN'] = 'www' + domain
+ engine_traits.custom['supported_domains']['CN'] = 'www' + domain # type: ignore
diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py
index 87b386d7a..d0eba6b88 100644
--- a/searx/engines/peertube.py
+++ b/searx/engines/peertube.py
@@ -13,7 +13,7 @@ from dateutil.relativedelta import relativedelta
import babel
-from searx import network
+from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx.locales import language_tag
from searx.utils import html_to_text
from searx.enginelib.traits import EngineTraits
@@ -147,32 +147,30 @@ def fetch_traits(engine_traits: EngineTraits):
https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
"""
- resp = network.get(
+ resp = get(
'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
# the response from search-index repository is very slow
timeout=60,
)
- if not resp.ok:
+ if not resp.ok: # type: ignore
print("ERROR: response from peertube is not OK.")
return
- js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
+ js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL) # type: ignore
if not js_lang:
print("ERROR: can't determine languages from peertube")
return
for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
+ eng_tag = lang.group(1)
+ if eng_tag == 'oc':
+ # Occitanis not known by babel, its closest relative is Catalan
+ # but 'ca' is already in the list of engine_traits.languages -->
+ # 'oc' will be ignored.
+ continue
try:
- eng_tag = lang.group(1)
- if eng_tag == 'oc':
- # Occitanis not known by babel, its closest relative is Catalan
- # but 'ca' is already in the list of engine_traits.languages -->
- # 'oc' will be ignored.
- continue
-
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
-
except babel.UnknownLocaleError:
print("ERROR: %s is unknown by babel" % eng_tag)
continue
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 2813d0bf3..92d69867a 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -91,8 +91,8 @@ import dateutil.parser
import lxml.html
import babel
-from searx import network
from searx.utils import extract_text, eval_xpath, gen_useragent
+from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx.exceptions import SearxEngineCaptchaException
from searx.locales import region_tag
from searx.enginelib.traits import EngineTraits
@@ -211,25 +211,25 @@ def get_sc_code(searxng_locale, params):
get_sc_url = base_url + '/?sc=%s' % (sc_code)
logger.debug("query new sc time-stamp ... %s", get_sc_url)
logger.debug("headers: %s", headers)
- resp = network.get(get_sc_url, headers=headers)
+ resp = get(get_sc_url, headers=headers)
# ?? x = network.get('https://www.startpage.com/sp/cdn/images/filter-chevron.svg', headers=headers)
# ?? https://www.startpage.com/sp/cdn/images/filter-chevron.svg
# ?? ping-back URL: https://www.startpage.com/sp/pb?sc=TLsB0oITjZ8F21
- if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
+ if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): # type: ignore
raise SearxEngineCaptchaException(
message="get_sc_code: got redirected to https://www.startpage.com/sp/captcha",
)
- dom = lxml.html.fromstring(resp.text)
+ dom = lxml.html.fromstring(resp.text) # type: ignore
try:
sc_code = eval_xpath(dom, search_form_xpath + '//input[@name="sc"]/@value')[0]
except IndexError as exc:
logger.debug("suspend startpage API --> https://github.com/searxng/searxng/pull/695")
raise SearxEngineCaptchaException(
- message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url,
+ message="get_sc_code: [PR-695] query new sc time-stamp failed! (%s)" % resp.url, # type: ignore
) from exc
sc_code_ts = time()
@@ -350,7 +350,7 @@ def _response_cat_web(dom):
title = extract_text(link)
if eval_xpath(result, content_xpath):
- content = extract_text(eval_xpath(result, content_xpath))
+ content: str = extract_text(eval_xpath(result, content_xpath)) # type: ignore
else:
content = ''
@@ -374,7 +374,7 @@ def _response_cat_web(dom):
date_string = content[0 : date_pos - 5]
# calculate datetime
- published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group()))
+ published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) # type: ignore
# fix content string
content = content[date_pos:]
@@ -399,12 +399,12 @@ def fetch_traits(engine_traits: EngineTraits):
'User-Agent': gen_useragent(),
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
}
- resp = network.get('https://www.startpage.com/do/settings', headers=headers)
+ resp = get('https://www.startpage.com/do/settings', headers=headers)
- if not resp.ok:
+ if not resp.ok: # type: ignore
print("ERROR: response from Startpage is not OK.")
- dom = lxml.html.fromstring(resp.text)
+ dom = lxml.html.fromstring(resp.text) # type: ignore
# regions
@@ -443,8 +443,10 @@ def fetch_traits(engine_traits: EngineTraits):
# get the native name of every language known by babel
- for lang_code in filter(lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers()):
- native_name = babel.Locale(lang_code).get_language_name().lower()
+ for lang_code in filter(
+ lambda lang_code: lang_code.find('_') == -1, babel.localedata.locale_identifiers() # type: ignore
+ ):
+ native_name = babel.Locale(lang_code).get_language_name().lower() # type: ignore
# add native name exactly as it is
catalog_engine2code[native_name] = lang_code
@@ -478,7 +480,7 @@ def fetch_traits(engine_traits: EngineTraits):
eng_tag = option.get('value')
if eng_tag in skip_eng_tags:
continue
- name = extract_text(option).lower()
+ name = extract_text(option).lower() # type: ignore
sxng_tag = catalog_engine2code.get(eng_tag)
if sxng_tag is None:
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index 98b3d6f9e..b4b70208d 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -61,7 +61,7 @@ import babel
from lxml import html
from searx import utils
-from searx import network
+from searx import network as _network
from searx import locales
from searx.enginelib.traits import EngineTraits
@@ -180,7 +180,7 @@ def response(resp):
):
return []
- network.raise_for_httperror(resp)
+ _network.raise_for_httperror(resp)
api_result = resp.json()
title = utils.html_to_text(api_result.get('titles', {}).get('display') or api_result.get('title'))
@@ -267,7 +267,7 @@ def fetch_wikimedia_traits(engine_traits: EngineTraits):
for sxng_tag in sxng_tag_list:
engine_traits.regions[sxng_tag] = eng_tag
- resp = network.get(list_of_wikipedias)
+ resp = _network.get(list_of_wikipedias)
if not resp.ok:
print("ERROR: response from Wikipedia is not OK.")
diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py
index 7f657aa54..5d978d0e0 100644
--- a/searx/settings_defaults.py
+++ b/searx/settings_defaults.py
@@ -209,9 +209,7 @@ SCHEMA = {
'enable_http2': SettingsValue(bool, True),
'verify': SettingsValue((bool, str), True),
'max_request_timeout': SettingsValue((None, numbers.Real), None),
- # Magic number kept from previous code
'pool_connections': SettingsValue(int, 100),
- # Picked from constructor
'pool_maxsize': SettingsValue(int, 10),
'keepalive_expiry': SettingsValue(numbers.Real, 5.0),
# default maximum redirect