diff options
author | Alexandre Flament <alex@al-f.net> | 2023-01-29 20:48:43 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-29 20:48:43 +0100 |
commit | 9d102fb08ff4a7dc04d093561adde60436e3f69e (patch) | |
tree | 6df4803a3f1ff0433c0f6c3344ea9133704c6f41 | |
parent | 4f808088f146b5f2c21b219d57d7bff885d8b9c2 (diff) | |
parent | 031162be0471650c09c25954b5251d06d8c042e1 (diff) | |
download | searxng-9d102fb08ff4a7dc04d093561adde60436e3f69e.tar.gz searxng-9d102fb08ff4a7dc04d093561adde60436e3f69e.zip |
Merge pull request #2132 from dalf/update_pr_1967
search.suspended_time settings: bug fixes
-rw-r--r-- | docs/admin/engines/settings.rst | 27 | ||||
-rw-r--r-- | docs/src/searx.exceptions.rst | 8 | ||||
-rw-r--r-- | searx/engines/startpage.py | 3 | ||||
-rw-r--r-- | searx/exceptions.py | 51 | ||||
-rw-r--r-- | searx/network/raise_for_httperror.py | 4 | ||||
-rw-r--r-- | searx/settings.yml | 2 |
6 files changed, 64 insertions, 31 deletions
diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst index c747e3f43..97f5ef63e 100644 --- a/docs/admin/engines/settings.rst +++ b/docs/admin/engines/settings.rst @@ -110,6 +110,13 @@ Global Settings default_lang: "" ban_time_on_fail: 5 max_ban_time_on_fail: 120 + suspended_times: + SearxEngineAccessDenied: 86400 + SearxEngineCaptcha: 86400 + SearxEngineTooManyRequests: 3600 + cf_SearxEngineCaptcha: 1296000 + cf_SearxEngineAccessDenied: 86400 + recaptcha_SearxEngineCaptcha: 604800 formats: - html @@ -159,6 +166,25 @@ Global Settings ``max_ban_time_on_fail``: Max ban time in seconds after engine errors. +``suspended_times``: + Engine suspension time after error (in seconds; set to 0 to disable) + + ``SearxEngineAccessDenied``: 86400 + For error "Access denied" and "HTTP error [402, 403]" + + ``SearxEngineCaptcha``: 86400 + For error "CAPTCHA" + + ``SearxEngineTooManyRequests``: 3600 + For error "Too many request" and "HTTP error 429" + + Cloudflare CAPTCHA: + - ``cf_SearxEngineCaptcha``: 1296000 + - ``cf_SearxEngineAccessDenied``: 86400 + + Google CAPTCHA: + - ``recaptcha_SearxEngineCaptcha``: 604800 + ``formats``: Result formats available from web, remove format to deny access (use lower case). @@ -168,6 +194,7 @@ Global Settings - ``json`` - ``rss`` + .. _settings server: ``server:`` diff --git a/docs/src/searx.exceptions.rst b/docs/src/searx.exceptions.rst new file mode 100644 index 000000000..72117e148 --- /dev/null +++ b/docs/src/searx.exceptions.rst @@ -0,0 +1,8 @@ +.. _searx.exceptions: + +================== +SearXNG Exceptions +================== + +.. automodule:: searx.exceptions + :members: diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 24aa59d03..f857f7b6d 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -62,8 +62,7 @@ sc_code = '' def raise_captcha(resp): if str(resp.url).startswith('https://www.startpage.com/sp/captcha'): - # suspend CAPTCHA for 7 days - raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600) + raise SearxEngineCaptchaException() def get_sc_code(headers): diff --git a/searx/exceptions.py b/searx/exceptions.py index af81bfb23..069be9057 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -1,29 +1,19 @@ -''' -searx is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -searx is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with searx. If not, see < http://www.gnu.org/licenses/ >. - -(C) 2017- by Alexandre Flament, <alex@al-f.net> -''' - +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Exception types raised by SearXNG modules. +""" from typing import Optional, Union class SearxException(Exception): - pass + """Base SearXNG exception.""" class SearxParameterException(SearxException): + """Raised when query miss a required paramater""" + def __init__(self, name, value): if value == '' or value is None: message = 'Empty ' + name + ' parameter' @@ -70,26 +60,35 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException): """The website is blocking the access""" SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied" - - def __init__(self, suspended_time=None, message='Access denied'): + """This settings contains the default suspended time (default 86400 sec / 1 + day).""" + + def __init__(self, suspended_time: int = None, message: str = 'Access denied'): + """Generic exception to raise when an engine denies access to the results. + + :param suspended_time: How long the engine is going to be suspended in + second. Defaults to None. + :type suspended_time: int, None + :param message: Internal message. Defaults to ``Access denied`` + :type message: str + """ suspended_time = suspended_time or self._get_default_suspended_time() super().__init__(message + ', suspended_time=' + str(suspended_time)) self.suspended_time = suspended_time self.message = message def _get_default_suspended_time(self): - from searx import get_setting + from searx import get_setting # pylint: disable=C0415 return get_setting(self.SUSPEND_TIME_SETTING) class SearxEngineCaptchaException(SearxEngineAccessDeniedException): - """The website has returned a CAPTCHA - - By default, searx stops sending requests to this engine for 1 day. - """ + """The website has returned a CAPTCHA.""" SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha" + """This settings contains the default suspended time (default 86400 sec / 1 + day).""" def __init__(self, suspended_time=None, message='CAPTCHA'): super().__init__(message=message, suspended_time=suspended_time) @@ -102,6 +101,8 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException): """ SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests" + """This settings contains the default suspended time (default 3660 sec / 1 + hour).""" def __init__(self, suspended_time=None, message='Too many request'): super().__init__(message=message, suspended_time=suspended_time) diff --git a/searx/network/raise_for_httperror.py b/searx/network/raise_for_httperror.py index 7fc2b7877..9f847d436 100644 --- a/searx/network/raise_for_httperror.py +++ b/searx/network/raise_for_httperror.py @@ -72,9 +72,7 @@ def raise_for_httperror(resp): if resp.status_code and resp.status_code >= 400: raise_for_captcha(resp) if resp.status_code in (402, 403): - raise SearxEngineAccessDeniedException( - message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24 - ) + raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code)) if resp.status_code == 429: raise SearxEngineTooManyRequestsException() resp.raise_for_status() diff --git a/searx/settings.yml b/searx/settings.yml index 81025d653..216cb3c82 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -45,7 +45,7 @@ search: ban_time_on_fail: 5 # max ban time in seconds after engine errors max_ban_time_on_fail: 120 - suspend_times: + suspended_times: # Engine suspension time after error (in seconds; set to 0 to disable) # For error "Access denied" and "HTTP error [402, 403]" SearxEngineAccessDenied: 86400 |