summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2023-01-29 20:48:43 +0100
committerGitHub <noreply@github.com>2023-01-29 20:48:43 +0100
commit9d102fb08ff4a7dc04d093561adde60436e3f69e (patch)
tree6df4803a3f1ff0433c0f6c3344ea9133704c6f41
parent4f808088f146b5f2c21b219d57d7bff885d8b9c2 (diff)
parent031162be0471650c09c25954b5251d06d8c042e1 (diff)
downloadsearxng-9d102fb08ff4a7dc04d093561adde60436e3f69e.tar.gz
searxng-9d102fb08ff4a7dc04d093561adde60436e3f69e.zip
Merge pull request #2132 from dalf/update_pr_1967
search.suspended_time settings: bug fixes
-rw-r--r--docs/admin/engines/settings.rst27
-rw-r--r--docs/src/searx.exceptions.rst8
-rw-r--r--searx/engines/startpage.py3
-rw-r--r--searx/exceptions.py51
-rw-r--r--searx/network/raise_for_httperror.py4
-rw-r--r--searx/settings.yml2
6 files changed, 64 insertions, 31 deletions
diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst
index c747e3f43..97f5ef63e 100644
--- a/docs/admin/engines/settings.rst
+++ b/docs/admin/engines/settings.rst
@@ -110,6 +110,13 @@ Global Settings
default_lang: ""
ban_time_on_fail: 5
max_ban_time_on_fail: 120
+ suspended_times:
+ SearxEngineAccessDenied: 86400
+ SearxEngineCaptcha: 86400
+ SearxEngineTooManyRequests: 3600
+ cf_SearxEngineCaptcha: 1296000
+ cf_SearxEngineAccessDenied: 86400
+ recaptcha_SearxEngineCaptcha: 604800
formats:
- html
@@ -159,6 +166,25 @@ Global Settings
``max_ban_time_on_fail``:
Max ban time in seconds after engine errors.
+``suspended_times``:
+ Engine suspension time after error (in seconds; set to 0 to disable)
+
+ ``SearxEngineAccessDenied``: 86400
+ For error "Access denied" and "HTTP error [402, 403]"
+
+ ``SearxEngineCaptcha``: 86400
+ For error "CAPTCHA"
+
+ ``SearxEngineTooManyRequests``: 3600
+ For error "Too many request" and "HTTP error 429"
+
+ Cloudflare CAPTCHA:
+ - ``cf_SearxEngineCaptcha``: 1296000
+ - ``cf_SearxEngineAccessDenied``: 86400
+
+ Google CAPTCHA:
+ - ``recaptcha_SearxEngineCaptcha``: 604800
+
``formats``:
Result formats available from web, remove format to deny access (use lower
case).
@@ -168,6 +194,7 @@ Global Settings
- ``json``
- ``rss``
+
.. _settings server:
``server:``
diff --git a/docs/src/searx.exceptions.rst b/docs/src/searx.exceptions.rst
new file mode 100644
index 000000000..72117e148
--- /dev/null
+++ b/docs/src/searx.exceptions.rst
@@ -0,0 +1,8 @@
+.. _searx.exceptions:
+
+==================
+SearXNG Exceptions
+==================
+
+.. automodule:: searx.exceptions
+ :members:
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 24aa59d03..f857f7b6d 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -62,8 +62,7 @@ sc_code = ''
def raise_captcha(resp):
if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
- # suspend CAPTCHA for 7 days
- raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600)
+ raise SearxEngineCaptchaException()
def get_sc_code(headers):
diff --git a/searx/exceptions.py b/searx/exceptions.py
index af81bfb23..069be9057 100644
--- a/searx/exceptions.py
+++ b/searx/exceptions.py
@@ -1,29 +1,19 @@
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-
-(C) 2017- by Alexandre Flament, <alex@al-f.net>
-'''
-
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Exception types raised by SearXNG modules.
+"""
from typing import Optional, Union
class SearxException(Exception):
- pass
+ """Base SearXNG exception."""
class SearxParameterException(SearxException):
+ """Raised when query miss a required paramater"""
+
def __init__(self, name, value):
if value == '' or value is None:
message = 'Empty ' + name + ' parameter'
@@ -70,26 +60,35 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
"""The website is blocking the access"""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineAccessDenied"
-
- def __init__(self, suspended_time=None, message='Access denied'):
+ """This settings contains the default suspended time (default 86400 sec / 1
+ day)."""
+
+ def __init__(self, suspended_time: int = None, message: str = 'Access denied'):
+ """Generic exception to raise when an engine denies access to the results.
+
+ :param suspended_time: How long the engine is going to be suspended in
+ second. Defaults to None.
+ :type suspended_time: int, None
+ :param message: Internal message. Defaults to ``Access denied``
+ :type message: str
+ """
suspended_time = suspended_time or self._get_default_suspended_time()
super().__init__(message + ', suspended_time=' + str(suspended_time))
self.suspended_time = suspended_time
self.message = message
def _get_default_suspended_time(self):
- from searx import get_setting
+ from searx import get_setting # pylint: disable=C0415
return get_setting(self.SUSPEND_TIME_SETTING)
class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
- """The website has returned a CAPTCHA
-
- By default, searx stops sending requests to this engine for 1 day.
- """
+ """The website has returned a CAPTCHA."""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineCaptcha"
+ """This settings contains the default suspended time (default 86400 sec / 1
+ day)."""
def __init__(self, suspended_time=None, message='CAPTCHA'):
super().__init__(message=message, suspended_time=suspended_time)
@@ -102,6 +101,8 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
"""
SUSPEND_TIME_SETTING = "search.suspended_times.SearxEngineTooManyRequests"
+ """This settings contains the default suspended time (default 3660 sec / 1
+ hour)."""
def __init__(self, suspended_time=None, message='Too many request'):
super().__init__(message=message, suspended_time=suspended_time)
diff --git a/searx/network/raise_for_httperror.py b/searx/network/raise_for_httperror.py
index 7fc2b7877..9f847d436 100644
--- a/searx/network/raise_for_httperror.py
+++ b/searx/network/raise_for_httperror.py
@@ -72,9 +72,7 @@ def raise_for_httperror(resp):
if resp.status_code and resp.status_code >= 400:
raise_for_captcha(resp)
if resp.status_code in (402, 403):
- raise SearxEngineAccessDeniedException(
- message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24
- )
+ raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code))
if resp.status_code == 429:
raise SearxEngineTooManyRequestsException()
resp.raise_for_status()
diff --git a/searx/settings.yml b/searx/settings.yml
index 81025d653..216cb3c82 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -45,7 +45,7 @@ search:
ban_time_on_fail: 5
# max ban time in seconds after engine errors
max_ban_time_on_fail: 120
- suspend_times:
+ suspended_times:
# Engine suspension time after error (in seconds; set to 0 to disable)
# For error "Access denied" and "HTTP error [402, 403]"
SearxEngineAccessDenied: 86400