diff options
Diffstat (limited to 'searx/search')
-rw-r--r-- | searx/search/__init__.py | 7 | ||||
-rw-r--r-- | searx/search/checker/__main__.py | 27 | ||||
-rw-r--r-- | searx/search/checker/background.py | 17 | ||||
-rw-r--r-- | searx/search/checker/impl.py | 75 | ||||
-rw-r--r-- | searx/search/models.py | 80 | ||||
-rw-r--r-- | searx/search/processors/__init__.py | 1 | ||||
-rw-r--r-- | searx/search/processors/abstract.py | 13 | ||||
-rw-r--r-- | searx/search/processors/offline.py | 2 | ||||
-rw-r--r-- | searx/search/processors/online.py | 45 | ||||
-rw-r--r-- | searx/search/processors/online_currency.py | 4 | ||||
-rw-r--r-- | searx/search/processors/online_dictionary.py | 6 |
11 files changed, 160 insertions, 117 deletions
diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 0a3c5b3ac..d66f3362d 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -123,8 +123,11 @@ class Search: # Max & user query: From user query except if above max actual_timeout = min(query_timeout, max_request_timeout) - logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})" - .format(actual_timeout, default_timeout, query_timeout, max_request_timeout)) + logger.debug( + "actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})".format( + actual_timeout, default_timeout, query_timeout, max_request_timeout + ) + ) return requests, actual_timeout diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py index 4ce4ca76b..1311288f3 100644 --- a/searx/search/checker/__main__.py +++ b/searx/search/checker/__main__.py @@ -37,12 +37,12 @@ else: stdout = io.TextIOWrapper( # pylint: disable=consider-using-with open(sys.stdout.fileno(), 'wb', 0), - write_through=True + write_through=True, ) stderr = io.TextIOWrapper( # pylint: disable=consider-using-with - open(sys.stderr.fileno(), 'wb', 0) - , write_through=True + open(sys.stderr.fileno(), 'wb', 0), + write_through=True, ) @@ -91,12 +91,21 @@ def run(engine_name_list, verbose): # call by setup.py def main(): parser = argparse.ArgumentParser(description='Check searx engines.') - parser.add_argument('engine_name_list', metavar='engine name', type=str, nargs='*', - help='engines name or shortcut list. Empty for all engines.') - parser.add_argument('--verbose', '-v', - action='store_true', dest='verbose', - help='Display details about the test results', - default=False) + parser.add_argument( + 'engine_name_list', + metavar='engine name', + type=str, + nargs='*', + help='engines name or shortcut list. Empty for all engines.', + ) + parser.add_argument( + '--verbose', + '-v', + action='store_true', + dest='verbose', + help='Display details about the test results', + default=False, + ) args = parser.parse_args() run(args.engine_name_list, args.verbose) diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py index d9f11a71c..ff005dd91 100644 --- a/searx/search/checker/background.py +++ b/searx/search/checker/background.py @@ -23,10 +23,12 @@ running = threading.Lock() def _get_interval(every, error_msg): if isinstance(every, int): every = (every, every) - if not isinstance(every, (tuple, list))\ - or len(every) != 2\ - or not isinstance(every[0], int)\ - or not isinstance(every[1], int): + if ( + not isinstance(every, (tuple, list)) + or len(every) != 2 + or not isinstance(every[0], int) + or not isinstance(every[1], int) + ): raise SearxSettingsException(error_msg, None) return every @@ -50,14 +52,11 @@ def _set_result(result, include_timestamp=True): def run(): - if not running.acquire(blocking=False): # pylint: disable=consider-using-with + if not running.acquire(blocking=False): # pylint: disable=consider-using-with return try: logger.info('Starting checker') - result = { - 'status': 'ok', - 'engines': {} - } + result = {'status': 'ok', 'engines': {}} for name, processor in PROCESSORS.items(): logger.debug('Checking %s engine', name) checker = Checker(processor) diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index e68248c0e..c0dd966d0 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -74,17 +74,23 @@ def _download_and_check_if_image(image_url: str) -> bool: try: # use "image_proxy" (avoid HTTP/2) network.set_context_network_name('image_proxy') - stream = network.stream('GET', image_url, timeout=10.0, allow_redirects=True, headers={ - 'User-Agent': gen_useragent(), - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', - 'Accept-Language': 'en-US;q=0.5,en;q=0.3', - 'Accept-Encoding': 'gzip, deflate, br', - 'DNT': '1', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1', - 'Sec-GPC': '1', - 'Cache-Control': 'max-age=0' - }) + stream = network.stream( + 'GET', + image_url, + timeout=10.0, + allow_redirects=True, + headers={ + 'User-Agent': gen_useragent(), + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'en-US;q=0.5,en;q=0.3', + 'Accept-Encoding': 'gzip, deflate, br', + 'DNT': '1', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'Sec-GPC': '1', + 'Cache-Control': 'max-age=0', + }, + ) r = next(stream) r.close() if r.status_code == 200: @@ -104,8 +110,7 @@ def _download_and_check_if_image(image_url: str) -> bool: def _is_url_image(image_url) -> bool: - """Normalize image_url - """ + """Normalize image_url""" if not isinstance(image_url, str): return False @@ -131,8 +136,9 @@ def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing. } -def _search_query_diff(sq1: SearchQuery, sq2: SearchQuery)\ - -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: +def _search_query_diff( + sq1: SearchQuery, sq2: SearchQuery +) -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: param1 = _search_query_to_dict(sq1) param2 = _search_query_to_dict(sq2) common = {} @@ -182,11 +188,9 @@ class ResultContainerTests: __slots__ = 'test_name', 'search_query', 'result_container', 'languages', 'stop_test', 'test_results' - def __init__(self, - test_results: TestResults, - test_name: str, - search_query: SearchQuery, - result_container: ResultContainer): + def __init__( + self, test_results: TestResults, test_name: str, search_query: SearchQuery, result_container: ResultContainer + ): self.test_name = test_name self.search_query = search_query self.result_container = result_container @@ -326,10 +330,9 @@ class CheckerTests: __slots__ = 'test_results', 'test_name', 'result_container_tests_list' - def __init__(self, - test_results: TestResults, - test_name: str, - result_container_tests_list: typing.List[ResultContainerTests]): + def __init__( + self, test_results: TestResults, test_name: str, result_container_tests_list: typing.List[ResultContainerTests] + ): self.test_results = test_results self.test_name = test_name self.result_container_tests_list = result_container_tests_list @@ -342,14 +345,17 @@ class CheckerTests: for i, urls_i in enumerate(urls_list): for j, urls_j in enumerate(urls_list): if i < j and urls_i == urls_j: - common, diff = _search_query_diff(self.result_container_tests_list[i].search_query, - self.result_container_tests_list[j].search_query) + common, diff = _search_query_diff( + self.result_container_tests_list[i].search_query, + self.result_container_tests_list[j].search_query, + ) common_str = ' '.join(['{}={!r}'.format(k, v) for k, v in common.items()]) - diff1_str = ', ' .join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) - diff2_str = ', ' .join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) - self.test_results.add_error(self.test_name, - 'results are identitical for {} and {} ({})' - .format(diff1_str, diff2_str, common_str)) + diff1_str = ', '.join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) + diff2_str = ', '.join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) + self.test_results.add_error( + self.test_name, + 'results are identitical for {} and {} ({})'.format(diff1_str, diff2_str, common_str), + ) class Checker: @@ -395,9 +401,10 @@ class Checker: elif isinstance(method, types.FunctionType): method(*args) else: - self.test_results.add_error(obj.test_name, - 'method {!r} ({}) not found for {}' - .format(method, method.__class__.__name__, obj.__class__.__name__)) + self.test_results.add_error( + obj.test_name, + 'method {!r} ({}) not found for {}'.format(method, method.__class__.__name__, obj.__class__.__name__), + ) def call_tests(self, obj, test_descriptions): for test_description in test_descriptions: diff --git a/searx/search/models.py b/searx/search/models.py index e48cb3611..ff5897966 100644 --- a/searx/search/models.py +++ b/searx/search/models.py @@ -25,19 +25,30 @@ class EngineRef: class SearchQuery: """container for all the search parameters (query, language, etc...)""" - __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\ - 'timeout_limit', 'external_bang', 'engine_data' - - def __init__(self, - query: str, - engineref_list: typing.List[EngineRef], - lang: str='all', - safesearch: int=0, - pageno: int=1, - time_range: typing.Optional[str]=None, - timeout_limit: typing.Optional[float]=None, - external_bang: typing.Optional[str]=None, - engine_data: typing.Optional[typing.Dict[str, str]]=None): + __slots__ = ( + 'query', + 'engineref_list', + 'lang', + 'safesearch', + 'pageno', + 'time_range', + 'timeout_limit', + 'external_bang', + 'engine_data', + ) + + def __init__( + self, + query: str, + engineref_list: typing.List[EngineRef], + lang: str = 'all', + safesearch: int = 0, + pageno: int = 1, + time_range: typing.Optional[str] = None, + timeout_limit: typing.Optional[float] = None, + external_bang: typing.Optional[str] = None, + engine_data: typing.Optional[typing.Dict[str, str]] = None, + ): self.query = query self.engineref_list = engineref_list self.lang = lang @@ -53,20 +64,39 @@ class SearchQuery: return list(set(map(lambda engineref: engineref.category, self.engineref_list))) def __repr__(self): - return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\ - format(self.query, self.engineref_list, self.lang, self.safesearch, - self.pageno, self.time_range, self.timeout_limit, self.external_bang) + return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format( + self.query, + self.engineref_list, + self.lang, + self.safesearch, + self.pageno, + self.time_range, + self.timeout_limit, + self.external_bang, + ) def __eq__(self, other): - return self.query == other.query\ - and self.engineref_list == other.engineref_list\ - and self.lang == other.lang\ - and self.safesearch == other.safesearch\ - and self.pageno == other.pageno\ - and self.time_range == other.time_range\ - and self.timeout_limit == other.timeout_limit\ + return ( + self.query == other.query + and self.engineref_list == other.engineref_list + and self.lang == other.lang + and self.safesearch == other.safesearch + and self.pageno == other.pageno + and self.time_range == other.time_range + and self.timeout_limit == other.timeout_limit and self.external_bang == other.external_bang + ) def __hash__(self): - return hash((self.query, tuple(self.engineref_list), self.lang, self.safesearch, self.pageno, self.time_range, - self.timeout_limit, self.external_bang)) + return hash( + ( + self.query, + tuple(self.engineref_list), + self.lang, + self.safesearch, + self.pageno, + self.time_range, + self.timeout_limit, + self.external_bang, + ) + ) diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index 8108f8dfa..966b990ec 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -29,6 +29,7 @@ logger = logger.getChild('search.processors') PROCESSORS = {} """Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" + def get_processor_class(engine_type): """Return processor class according to the ``engine_type``""" for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]: diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index b5fa063fd..732b55d52 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -19,6 +19,7 @@ from searx.utils import get_engine_from_settings logger = logger.getChild('searx.search.processor') SUSPENDED_STATUS = {} + class SuspendedStatus: """Class to handle suspend state.""" @@ -39,8 +40,10 @@ class SuspendedStatus: # update continuous_errors / suspend_end_time self.continuous_errors += 1 if suspended_time is None: - suspended_time = min(settings['search']['max_ban_time_on_fail'], - self.continuous_errors * settings['search']['ban_time_on_fail']) + suspended_time = min( + settings['search']['max_ban_time_on_fail'], + self.continuous_errors * settings['search']['ban_time_on_fail'], + ) self.suspend_end_time = default_timer() + suspended_time self.suspend_reason = suspend_reason logger.debug('Suspend for %i seconds', suspended_time) @@ -127,9 +130,9 @@ class EngineProcessor(ABC): def extend_container_if_suspended(self, result_container): if self.suspended_status.is_suspended: - result_container.add_unresponsive_engine(self.engine_name, - self.suspended_status.suspend_reason, - suspended=True) + result_container.add_unresponsive_engine( + self.engine_name, self.suspended_status.suspend_reason, suspended=True + ) return True return False diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py index ec7a4a36e..13f077cb1 100644 --- a/searx/search/processors/offline.py +++ b/searx/search/processors/offline.py @@ -23,6 +23,6 @@ class OfflineProcessor(EngineProcessor): except ValueError as e: # do not record the error self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 674ba9c8e..8d8275df1 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -66,10 +66,7 @@ class OnlineProcessor(EngineProcessor): # create dictionary which contain all # informations about the request request_args = dict( - headers=params['headers'], - cookies=params['cookies'], - verify=params['verify'], - auth=params['auth'] + headers=params['headers'], cookies=params['cookies'], verify=params['verify'], auth=params['auth'] ) # max_redirects @@ -105,10 +102,12 @@ class OnlineProcessor(EngineProcessor): status_code = str(response.status_code or '') reason = response.reason_phrase or '' hostname = response.url.host - count_error(self.engine_name, - '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), - (status_code, reason, hostname), - secondary=True) + count_error( + self.engine_name, + '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), + (status_code, reason, hostname), + secondary=True, + ) return response @@ -147,22 +146,16 @@ class OnlineProcessor(EngineProcessor): # requests timeout (connect or read) self.handle_exception(result_container, e, suspend=True) self.logger.error( - "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}" - .format( - default_timer() - start_time, - timeout_limit, - e.__class__.__name__ + "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}".format( + default_timer() - start_time, timeout_limit, e.__class__.__name__ ) ) except (httpx.HTTPError, httpx.StreamError) as e: # other requests exception self.handle_exception(result_container, e, suspend=True) self.logger.exception( - "requests exception (search duration : {0} s, timeout: {1} s) : {2}" - .format( - default_timer() - start_time, - timeout_limit, - e + "requests exception (search duration : {0} s, timeout: {1} s) : {2}".format( + default_timer() - start_time, timeout_limit, e ) ) except SearxEngineCaptchaException as e: @@ -188,10 +181,9 @@ class OnlineProcessor(EngineProcessor): if getattr(self.engine, 'paging', False): tests['paging'] = { - 'matrix': {'query': 'time', - 'pageno': (1, 2, 3)}, + 'matrix': {'query': 'time', 'pageno': (1, 2, 3)}, 'result_container': ['not_empty'], - 'test': ['unique_results'] + 'test': ['unique_results'], } if 'general' in self.engine.categories: # avoid documentation about HTML tags (<time> and <input type="time">) @@ -199,10 +191,9 @@ class OnlineProcessor(EngineProcessor): if getattr(self.engine, 'time_range', False): tests['time_range'] = { - 'matrix': {'query': 'news', - 'time_range': (None, 'day')}, + 'matrix': {'query': 'news', 'time_range': (None, 'day')}, 'result_container': ['not_empty'], - 'test': ['unique_results'] + 'test': ['unique_results'], } if getattr(self.engine, 'supported_languages', []): @@ -216,10 +207,6 @@ class OnlineProcessor(EngineProcessor): } if getattr(self.engine, 'safesearch', False): - tests['safesearch'] = { - 'matrix': {'query': 'porn', - 'safesearch': (0, 2)}, - 'test': ['unique_results'] - } + tests['safesearch'] = {'matrix': {'query': 'porn', 'safesearch': (0, 2)}, 'test': ['unique_results']} return tests diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py index 4e5c57264..6bd891b1d 100644 --- a/searx/search/processors/online_currency.py +++ b/searx/search/processors/online_currency.py @@ -12,11 +12,13 @@ from .online import OnlineProcessor parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) + def normalize_name(name): name = name.lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() + def name_to_iso4217(name): name = normalize_name(name) currency = CURRENCIES['names'].get(name, [name]) @@ -24,9 +26,11 @@ def name_to_iso4217(name): return currency return currency[0] + def iso4217_to_name(iso4217, language): return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217) + class OnlineCurrencyProcessor(OnlineProcessor): """Processor class used by ``online_currency`` engines.""" diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py index 72941d57a..3e7f6ed59 100644 --- a/searx/search/processors/online_dictionary.py +++ b/searx/search/processors/online_dictionary.py @@ -11,6 +11,7 @@ from .online import OnlineProcessor parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) + class OnlineDictionaryProcessor(OnlineProcessor): """Processor class used by ``online_dictionary`` engines.""" @@ -44,10 +45,9 @@ class OnlineDictionaryProcessor(OnlineProcessor): if getattr(self.engine, 'paging', False): tests['translation_paging'] = { - 'matrix': {'query': 'en-es house', - 'pageno': (1, 2, 3)}, + 'matrix': {'query': 'en-es house', 'pageno': (1, 2, 3)}, 'result_container': ['not_empty', ('one_title_contains', 'house')], - 'test': ['unique_results'] + 'test': ['unique_results'], } else: tests['translation'] = { |