diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2021-12-27 09:26:22 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2021-12-27 09:26:22 +0100 |
commit | 3d96a9839a12649874b6d4cf9466bd3616b0a03c (patch) | |
tree | e7d54d1e345b1e792d538ddc250f4827bb2fd9b9 | |
parent | fcdc2c2cd26e24c2aa3f064d93cee3e29dc2a30c (diff) | |
download | searxng-3d96a9839a12649874b6d4cf9466bd3616b0a03c.tar.gz searxng-3d96a9839a12649874b6d4cf9466bd3616b0a03c.zip |
[format.python] initial formatting of the python code
This patch was generated by black [1]::
make format.python
[1] https://github.com/psf/black
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
184 files changed, 2808 insertions, 2844 deletions
diff --git a/searx/__init__.py b/searx/__init__.py index b1626ae9f..d2d389ea9 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -29,6 +29,7 @@ if settings is not None: _unset = object() + def get_setting(name, default=_unset): """Returns the value to which ``name`` point. If there is no such name in the settings and the ``default`` is unset, a :py:obj:`KeyError` is raised. @@ -80,14 +81,9 @@ def logging_config_debug(): 'levelname': {'color': 8}, 'name': {'color': 8}, 'programname': {'color': 'cyan'}, - 'username': {'color': 'yellow'} + 'username': {'color': 'yellow'}, } - coloredlogs.install( - level=log_level, - level_styles=level_styles, - field_styles=field_styles, - fmt=LOG_FORMAT_DEBUG - ) + coloredlogs.install(level=log_level, level_styles=level_styles, field_styles=field_styles, fmt=LOG_FORMAT_DEBUG) else: logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG) diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py index d5223e517..e6c383330 100644 --- a/searx/answerers/random/answerer.py +++ b/searx/answerers/random/answerer.py @@ -8,13 +8,12 @@ from flask_babel import gettext # specifies which search query keywords triggers this answerer keywords = ('random',) -random_int_max = 2**31 +random_int_max = 2 ** 31 random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase def random_characters(): - return [random.choice(random_string_letters) - for _ in range(random.randint(8, 32))] + return [random.choice(random_string_letters) for _ in range(random.randint(8, 32))] def random_string(): @@ -39,11 +38,13 @@ def random_uuid(): return str(uuid.uuid4()) -random_types = {'string': random_string, - 'int': random_int, - 'float': random_float, - 'sha256': random_sha256, - 'uuid': random_uuid} +random_types = { + 'string': random_string, + 'int': random_int, + 'float': random_float, + 'sha256': random_sha256, + 'uuid': random_uuid, +} # required answerer function @@ -62,6 +63,8 @@ def answer(query): # required answerer function # returns information about the answerer def self_info(): - return {'name': gettext('Random value generator'), - 'description': gettext('Generate different random values'), - 'examples': ['random {}'.format(x) for x in random_types]} + return { + 'name': gettext('Random value generator'), + 'description': gettext('Generate different random values'), + 'examples': ['random {}'.format(x) for x in random_types], + } diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py index abd4be7f5..60f0d304f 100644 --- a/searx/answerers/statistics/answerer.py +++ b/searx/answerers/statistics/answerer.py @@ -4,11 +4,7 @@ from operator import mul from flask_babel import gettext -keywords = ('min', - 'max', - 'avg', - 'sum', - 'prod') +keywords = ('min', 'max', 'avg', 'sum', 'prod') # required answerer function @@ -47,6 +43,8 @@ def answer(query): # required answerer function # returns information about the answerer def self_info(): - return {'name': gettext('Statistics functions'), - 'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)), - 'examples': ['avg 123 548 2.04 24.2']} + return { + 'name': gettext('Statistics functions'), + 'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)), + 'examples': ['avg 123 548 2.04 24.2'], + } diff --git a/searx/autocomplete.py b/searx/autocomplete.py index a55377cd9..b8d272c32 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -120,14 +120,15 @@ def wikipedia(query, lang): return [] -backends = {'dbpedia': dbpedia, - 'duckduckgo': duckduckgo, - 'google': google, - 'startpage': startpage, - 'swisscows': swisscows, - 'qwant': qwant, - 'wikipedia': wikipedia - } +backends = { + 'dbpedia': dbpedia, + 'duckduckgo': duckduckgo, + 'google': google, + 'startpage': startpage, + 'swisscows': swisscows, + 'qwant': qwant, + 'wikipedia': wikipedia, +} def search_autocomplete(backend_name, query, lang): diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 5937ea557..87bfb5477 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -23,10 +23,12 @@ from pathlib import Path data_dir = Path(__file__).parent + def _load(filename): with open(data_dir / filename, encoding='utf-8') as f: return json.load(f) + def ahmia_blacklist_loader(): """Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion names. The MD5 values are fetched by:: @@ -39,6 +41,7 @@ def ahmia_blacklist_loader(): with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as f: return f.read().split() + ENGINES_LANGUAGES = _load('engines_languages.json') CURRENCIES = _load('currencies.json') USER_AGENTS = _load('useragents.json') diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index e6a243596..730a4c445 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -43,11 +43,15 @@ def response(resp): filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) - results.append({'url': href, - 'title': title, - 'seed': seed, - 'leech': leech, - 'filesize': filesize, - 'template': 'torrent.html'}) + results.append( + { + 'url': href, + 'title': title, + 'seed': seed, + 'leech': leech, + 'filesize': filesize, + 'template': 'torrent.html', + } + ) return results diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 44ea9a4bd..fa9749e9d 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -57,6 +57,7 @@ engine_shortcuts = {} """ + def load_engine(engine_data): """Load engine from ``engine_data``. @@ -166,20 +167,19 @@ def set_language_attributes(engine): # settings.yml if engine.language not in engine.supported_languages: raise ValueError( - "settings.yml - engine: '%s' / language: '%s' not supported" % ( - engine.name, engine.language )) + "settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language) + ) if isinstance(engine.supported_languages, dict): - engine.supported_languages = { - engine.language : engine.supported_languages[engine.language] - } + engine.supported_languages = {engine.language: engine.supported_languages[engine.language]} else: engine.supported_languages = [engine.language] # find custom aliases for non standard language codes for engine_lang in engine.supported_languages: iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None) - if (iso_lang + if ( + iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and iso_lang not in engine.supported_languages @@ -197,14 +197,12 @@ def set_language_attributes(engine): } engine.fetch_supported_languages = ( # pylint: disable=protected-access - lambda: engine._fetch_supported_languages( - get(engine.supported_languages_url, headers=headers)) + lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)) ) def update_attributes_for_tor(engine): - if (settings['outgoing'].get('using_tor_proxy') - and hasattr(engine, 'onion_url') ): + if settings['outgoing'].get('using_tor_proxy') and hasattr(engine, 'onion_url'): engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) @@ -217,9 +215,7 @@ def is_missing_required_attributes(engine): missing = False for engine_attr in dir(engine): if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None: - logger.error( - 'Missing engine config attribute: "{0}.{1}"' - .format(engine.name, engine_attr)) + logger.error('Missing engine config attribute: "{0}.{1}"'.format(engine.name, engine_attr)) missing = True return missing @@ -230,8 +226,7 @@ def is_engine_active(engine): return False # exclude onion engines if not using tor - if ('onions' in engine.categories - and not settings['outgoing'].get('using_tor_proxy') ): + if 'onions' in engine.categories and not settings['outgoing'].get('using_tor_proxy'): return False return True @@ -253,8 +248,7 @@ def register_engine(engine): def load_engines(engine_list): - """usage: ``engine_list = settings['engines']`` - """ + """usage: ``engine_list = settings['engines']``""" engines.clear() engine_shortcuts.clear() categories.clear() diff --git a/searx/engines/ahmia.py b/searx/engines/ahmia.py index b9a0086bd..33e0cc393 100644 --- a/searx/engines/ahmia.py +++ b/searx/engines/ahmia.py @@ -25,9 +25,7 @@ page_size = 10 # search url search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}' time_range_support = True -time_range_dict = {'day': 1, - 'week': 7, - 'month': 30} +time_range_dict = {'day': 1, 'week': 7, 'month': 30} # xpaths results_xpath = '//li[@class="result"]' @@ -54,7 +52,7 @@ def response(resp): # trim results so there's not way too many at once first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1) all_results = eval_xpath_list(dom, results_xpath) - trimmed_results = all_results[first_result_index:first_result_index + page_size] + trimmed_results = all_results[first_result_index : first_result_index + page_size] # get results for result in trimmed_results: @@ -65,10 +63,7 @@ def response(resp): title = extract_text(eval_xpath(result, title_xpath)) content = extract_text(eval_xpath(result, content_xpath)) - results.append({'url': cleaned_url, - 'title': title, - 'content': content, - 'is_onion': True}) + results.append({'url': cleaned_url, 'title': title, 'content': content, 'is_onion': True}) # get spelling corrections for correction in eval_xpath_list(dom, correction_xpath): diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index 746a8cd9c..da84bc79e 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -35,8 +35,8 @@ search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{q def request(query, params): params['url'] = search_url.format( - pageno = params['pageno'], - query = urlencode({'s': query}), + pageno=params['pageno'], + query=urlencode({'s': query}), ) logger.debug("query_url --> %s", params['url']) return params @@ -55,11 +55,7 @@ def response(resp): url = base_url + link.attrib.get('href') + '#downloads' title = extract_text(link) img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0) - res = { - 'url': url, - 'title': title, - 'img_src': img_src - } + res = {'url': url, 'title': title, 'img_src': img_src} results.append(res) diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index 1aa8d0ade..1cfb3983f 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -97,7 +97,7 @@ main_langs = { 'sl': 'Slovenský', 'th': 'ไทย', 'uk': 'Українська', - 'zh': '简体中文' + 'zh': '简体中文', } supported_languages = dict(lang_urls, **main_langs) @@ -141,7 +141,6 @@ def response(resp): href = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) - results.append({'url': href, - 'title': title}) + results.append({'url': href, 'title': title}) return results diff --git a/searx/engines/artic.py b/searx/engines/artic.py index 104ab8839..c0ae0a5e7 100644 --- a/searx/engines/artic.py +++ b/searx/engines/artic.py @@ -27,19 +27,23 @@ nb_per_page = 20 search_api = 'https://api.artic.edu/api/v1/artworks/search?' image_api = 'https://www.artic.edu/iiif/2/' + def request(query, params): - args = urlencode({ - 'q' : query, - 'page' : params['pageno'], - 'fields' : 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', - 'limit' : nb_per_page, - }) + args = urlencode( + { + 'q': query, + 'page': params['pageno'], + 'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', + 'limit': nb_per_page, + } + ) params['url'] = search_api + args logger.debug("query_url --> %s", params['url']) return params + def response(resp): results = [] @@ -50,14 +54,16 @@ def response(resp): if not result['image_id']: continue - results.append({ - 'url': 'https://artic.edu/artworks/%(id)s' % result, - 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, - 'content': result['medium_display'], - 'author': ', '.join(result['artist_titles']), - 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, - 'img_format': result['dimensions'], - 'template': 'images.html' - }) + results.append( + { + 'url': 'https://artic.edu/artworks/%(id)s' % result, + 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, + 'content': result['medium_display'], + 'author': ', '.join(result['artist_titles']), + 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, + 'img_format': result['dimensions'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index 09ea07ea5..a1a58172d 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -20,8 +20,9 @@ about = { categories = ['science'] paging = True -base_url = 'https://export.arxiv.org/api/query?search_query=all:'\ - + '{query}&start={offset}&max_results={number_of_results}' +base_url = ( + 'https://export.arxiv.org/api/query?search_query=all:' + '{query}&start={offset}&max_results={number_of_results}' +) # engine dependent config number_of_results = 10 @@ -31,9 +32,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=query, - offset=offset, - number_of_results=number_of_results) + string_args = dict(query=query, offset=offset, number_of_results=number_of_results) params['url'] = base_url.format(**string_args) @@ -65,10 +64,7 @@ def response(resp): publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ') - res_dict = {'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content} + res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content} results.append(res_dict) diff --git a/searx/engines/bandcamp.py b/searx/engines/bandcamp.py index 62745243f..ba951a393 100644 --- a/searx/engines/bandcamp.py +++ b/searx/engines/bandcamp.py @@ -44,9 +44,7 @@ def request(query, params): pageno : 1 # number of the requested page ''' - search_path = search_string.format( - query=urlencode({'q': query}), - page=params['pageno']) + search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno']) params['url'] = base_url + search_path diff --git a/searx/engines/base.py b/searx/engines/base.py index 463274681..5a2d66619 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -21,8 +21,10 @@ about = { categories = ['science'] -base_url = 'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'\ - + '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}' +base_url = ( + 'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi' + + '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}' +) # engine dependent config paging = True @@ -47,7 +49,7 @@ shorcut_dict = { 'source:': 'dcsource:', 'subject:': 'dcsubject:', 'title:': 'dctitle:', - 'type:': 'dcdctype:' + 'type:': 'dcdctype:', } @@ -59,9 +61,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=urlencode({'query': query}), - offset=offset, - hits=number_of_results) + string_args = dict(query=urlencode({'query': query}), offset=offset, hits=number_of_results) params['url'] = base_url.format(**string_args) @@ -93,7 +93,7 @@ def response(resp): if len(item.text) > 300: content += "..." -# dates returned by the BASE API are not several formats + # dates returned by the BASE API are not several formats publishedDate = None for date_format in ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d', '%Y-%m', '%Y']: try: @@ -103,14 +103,9 @@ def response(resp): pass if publishedDate is not None: - res_dict = {'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content} + res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content} else: - res_dict = {'url': url, - 'title': title, - 'content': content} + res_dict = {'url': url, 'title': title, 'content': content} results.append(res_dict) diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 3917e54c1..59fc22be4 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -36,9 +36,11 @@ inital_query = 'search?{query}&search=&form=QBLH' # following queries: https://www.bing.com/search?q=foo&search=&first=11&FORM=PERE page_query = 'search?{query}&search=&first={offset}&FORM=PERE' + def _get_offset_from_pageno(pageno): return (pageno - 1) * 10 + 1 + def request(query, params): offset = _get_offset_from_pageno(params.get('pageno', 1)) @@ -53,30 +55,23 @@ def request(query, params): if params['language'] == 'all': lang = 'EN' else: - lang = match_language( - params['language'], supported_languages, language_aliases - ) + lang = match_language(params['language'], supported_languages, language_aliases) - query = 'language:{} {}'.format( - lang.split('-')[0].upper(), query - ) + query = 'language:{} {}'.format(lang.split('-')[0].upper(), query) - search_path = search_string.format( - query = urlencode({'q': query}), - offset = offset) + search_path = search_string.format(query=urlencode({'q': query}), offset=offset) if offset > 1: - referer = base_url + inital_query.format(query = urlencode({'q': query})) + referer = base_url + inital_query.format(query=urlencode({'q': query})) params['headers']['Referer'] = referer - logger.debug("headers.Referer --> %s", referer ) + logger.debug("headers.Referer --> %s", referer) params['url'] = base_url + search_path params['headers']['Accept-Language'] = "en-US,en;q=0.5" - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params + def response(resp): results = [] @@ -87,7 +82,7 @@ def response(resp): for result in eval_xpath(dom, '//div[@class="sa_cc"]'): # IMO //div[@class="sa_cc"] does no longer match - logger.debug('found //div[@class="sa_cc"] --> %s', result) + logger.debug('found //div[@class="sa_cc"] --> %s', result) link = eval_xpath(result, './/h3/a')[0] url = link.attrib.get('href') @@ -95,11 +90,7 @@ def response(resp): content = extract_text(eval_xpath(result, './/p')) # append result - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) # parse results again if nothing is found yet for result in eval_xpath(dom, '//li[@class="b_algo"]'): @@ -110,18 +101,14 @@ def response(resp): content = extract_text(eval_xpath(result, './/p')) # append result - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) try: result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()')) if "-" in result_len_container: # Remove the part "from-to" for paginated request ... - result_len_container = result_len_container[result_len_container.find("-") * 2 + 2:] + result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :] result_len_container = re.sub('[^0-9]', '', result_len_container) diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 4bee9bc7d..246d37a30 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -9,7 +9,10 @@ from json import loads from searx.utils import match_language from searx.engines.bing import language_aliases -from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.bing import ( + _fetch_supported_languages, + supported_languages_url, +) # NOQA # pylint: disable=unused-import # about about = { @@ -31,39 +34,25 @@ number_of_results = 28 # search-url base_url = 'https://www.bing.com/' -search_string = 'images/search'\ - '?{query}'\ - '&count={count}'\ - '&first={first}'\ - '&tsc=ImageHoverTitle' +search_string = 'images/search' '?{query}' '&count={count}' '&first={first}' '&tsc=ImageHoverTitle' time_range_string = '&qft=+filterui:age-lt{interval}' -time_range_dict = {'day': '1440', - 'week': '10080', - 'month': '43200', - 'year': '525600'} +time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'} # safesearch definitions -safesearch_types = {2: 'STRICT', - 1: 'DEMOTE', - 0: 'OFF'} +safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # do search-request def request(query, params): offset = ((params['pageno'] - 1) * number_of_results) + 1 - search_path = search_string.format( - query=urlencode({'q': query}), - count=number_of_results, - first=offset) + search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset) language = match_language(params['language'], supported_languages, language_aliases).lower() - params['cookies']['SRCHHPGUSR'] = \ - 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') - params['cookies']['_EDGE_S'] = 'mkt=' + language +\ - '&ui=' + language + '&F=1' + params['cookies']['_EDGE_S'] = 'mkt=' + language + '&ui=' + language + '&F=1' params['url'] = base_url + search_path if params['time_range'] in time_range_dict: @@ -92,14 +81,18 @@ def response(resp): # strip 'Unicode private use area' highlighting, they render to Tux # the Linux penguin and a standing diamond on my machine... title = m.get('t', '').replace('\ue000', '').replace('\ue001', '') - results.append({'template': 'images.html', - 'url': m['purl'], - 'thumbnail_src': m['turl'], - 'img_src': m['murl'], - 'content': '', - 'title': title, - 'source': source, - 'img_format': img_format}) + results.append( + { + 'template': 'images.html', + 'url': m['purl'], + 'thumbnail_src': m['turl'], + 'img_src': m['murl'], + 'content': '', + 'title': title, + 'source': source, + 'img_format': img_format, + } + ) except: continue diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index f0bc8bead..22856541b 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -13,10 +13,7 @@ from datetime import datetime from dateutil import parser from lxml import etree from lxml.etree import XPath -from searx.utils import ( - match_language, - eval_xpath_getindex -) +from searx.utils import match_language, eval_xpath_getindex from searx.engines.bing import ( # pylint: disable=unused-import language_aliases, _fetch_supported_languages, @@ -42,11 +39,8 @@ time_range_support = True base_url = 'https://www.bing.com/' search_string = 'news/search?{query}&first={offset}&format=RSS' search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS' -time_range_dict = { - 'day': '7', - 'week': '8', - 'month': '9' -} +time_range_dict = {'day': '7', 'week': '8', 'month': '9'} + def url_cleanup(url_string): """remove click""" @@ -57,6 +51,7 @@ def url_cleanup(url_string): url_string = query.get('url', None) return url_string + def image_url_cleanup(url_string): """replace the http://*bing.com/th?id=... by https://www.bing.com/th?id=...""" @@ -66,6 +61,7 @@ def image_url_cleanup(url_string): url_string = "https://www.bing.com/th?id=" + quote(query.get('id')) return url_string + def _get_url(query, language, offset, time_range): if time_range in time_range_dict: search_path = search_string_with_time.format( @@ -91,6 +87,7 @@ def _get_url(query, language, offset, time_range): ) return base_url + search_path + def request(query, params): if params['time_range'] and params['time_range'] not in time_range_dict: @@ -105,6 +102,7 @@ def request(query, params): return params + def response(resp): results = [] @@ -127,26 +125,16 @@ def response(resp): publishedDate = datetime.now() # thumbnail - thumbnail = eval_xpath_getindex( - item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None) + thumbnail = eval_xpath_getindex(item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None) if thumbnail is not None: thumbnail = image_url_cleanup(thumbnail) # append result if thumbnail is not None: - results.append({ - 'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content, - 'img_src': thumbnail - }) + results.append( + {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content, 'img_src': thumbnail} + ) else: - results.append({ - 'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content - }) + results.append({'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}) return results diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 2e1f13de2..ad61724a1 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -9,7 +9,10 @@ from urllib.parse import urlencode from searx.utils import match_language from searx.engines.bing import language_aliases -from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.bing import ( + _fetch_supported_languages, + supported_languages_url, +) # NOQA # pylint: disable=unused-import # about about = { @@ -28,36 +31,22 @@ time_range_support = True number_of_results = 28 base_url = 'https://www.bing.com/' -search_string = 'videos/search'\ - '?{query}'\ - '&count={count}'\ - '&first={first}'\ - '&scope=video'\ - '&FORM=QBLH' +search_string = 'videos/search' '?{query}' '&count={count}' '&first={first}' '&scope=video' '&FORM=QBLH' time_range_string = '&qft=+filterui:videoage-lt{interval}' -time_range_dict = {'day': '1440', - 'week': '10080', - 'month': '43200', - 'year': '525600'} +time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'} # safesearch definitions -safesearch_types = {2: 'STRICT', - 1: 'DEMOTE', - 0: 'OFF'} +safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # do search-request def request(query, params): offset = ((params['pageno'] - 1) * number_of_results) + 1 - search_path = search_string.format( - query=urlencode({'q': query}), - count=number_of_results, - first=offset) + search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset) # safesearch cookie - params['cookies']['SRCHHPGUSR'] = \ - 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') # language cookie language = match_language(params['language'], supported_languages, language_aliases).lower() @@ -89,11 +78,15 @@ def response(resp): info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip() content = '{0} - {1}'.format(metadata['du'], info) thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid']) - results.append({'url': metadata['murl'], - 'thumbnail': thumbnail, - 'title': metadata.get('vt', ''), - 'content': content, - 'template': 'videos.html'}) + results.append( + { + 'url': metadata['murl'], + 'thumbnail': thumbnail, + 'title': metadata.get('vt', ''), + 'content': content, + 'template': 'videos.html', + } + ) except: continue diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index cda9e9355..c5dd92105 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -11,10 +11,7 @@ from searx.utils import extract_text, get_torrent_size about = { "website": 'https://btdig.com', "wikidata_id": 'Q4836698', - "official_api_documentation": { - 'url': 'https://btdig.com/contacts', - 'comment': 'on demand' - }, + "official_api_documentation": {'url': 'https://btdig.com/contacts', 'comment': 'on demand'}, "use_official_api": False, "require_api_key": False, "results": 'HTML', @@ -31,8 +28,7 @@ search_url = url + '/search?q={search_term}&p={pageno}' # do search-request def request(query, params): - params['url'] = search_url.format(search_term=quote(query), - pageno=params['pageno'] - 1) + params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'] - 1) return params @@ -77,13 +73,17 @@ def response(resp): magnetlink = result.xpath('.//div[@class="torrent_magnet"]//a')[0].attrib['href'] # append result - results.append({'url': href, - 'title': title, - 'content': content, - 'filesize': filesize, - 'files': files, - 'magnetlink': magnetlink, - 'template': 'torrent.html'}) + results.append( + { + 'url': href, + 'title': title, + 'content': content, + 'filesize': filesize, + 'files': files, + 'magnetlink': magnetlink, + 'template': 'torrent.html', + } + ) # return results sorted by seeder return results diff --git a/searx/engines/ccengine.py b/searx/engines/ccengine.py index 6f3a5adb7..93ac30c86 100644 --- a/searx/engines/ccengine.py +++ b/searx/engines/ccengine.py @@ -29,10 +29,7 @@ search_string = '&page={page}&page_size={nb_per_page}&format=json&{query}' def request(query, params): - search_path = search_string.format( - query=urlencode({'q': query}), - nb_per_page=nb_per_page, - page=params['pageno']) + search_path = search_string.format(query=urlencode({'q': query}), nb_per_page=nb_per_page, page=params['pageno']) params['url'] = base_url + search_path @@ -45,9 +42,13 @@ def response(resp): json_data = loads(resp.text) for result in json_data['results']: - results.append({'url': result['foreign_landing_url'], - 'title': result['title'], - 'img_src': result['url'], - 'template': 'images.html'}) + results.append( + { + 'url': result['foreign_landing_url'], + 'title': result['title'], + 'img_src': result['url'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/command.py b/searx/engines/command.py index aca379c67..abd29e2a5 100644 --- a/searx/engines/command.py +++ b/searx/engines/command.py @@ -138,7 +138,7 @@ def __check_query_params(params): def check_parsing_options(engine_settings): - """ Checks if delimiter based parsing or regex parsing is configured correctly """ + """Checks if delimiter based parsing or regex parsing is configured correctly""" if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings: raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex') @@ -151,7 +151,7 @@ def check_parsing_options(engine_settings): def __parse_single_result(raw_result): - """ Parses command line output based on configuration """ + """Parses command line output based on configuration""" result = {} @@ -167,6 +167,6 @@ def __parse_single_result(raw_result): found = regex.search(raw_result) if not found: return {} - result[result_key] = raw_result[found.start():found.end()] + result[result_key] = raw_result[found.start() : found.end()] return result diff --git a/searx/engines/core.py b/searx/engines/core.py index e83c8bbe9..1fcb68f1f 100644 --- a/searx/engines/core.py +++ b/searx/engines/core.py @@ -28,22 +28,24 @@ api_key = 'unset' base_url = 'https://core.ac.uk:443/api-v2/search/' search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}' + def request(query, params): if api_key == 'unset': raise SearxEngineAPIException('missing CORE API key') search_path = search_string.format( - query = urlencode({'q': query}), - nb_per_page = nb_per_page, - page = params['pageno'], - apikey = api_key, + query=urlencode({'q': query}), + nb_per_page=nb_per_page, + page=params['pageno'], + apikey=api_key, ) params['url'] = base_url + search_path logger.debug("query_url --> %s", params['url']) return params + def response(resp): results = [] json_data = loads(resp.text) @@ -52,7 +54,7 @@ def response(resp): source = result['_source'] time = source['publishedDate'] or source['depositedDate'] - if time : + if time: date = datetime.fromtimestamp(time / 1000) else: date = None @@ -66,12 +68,14 @@ def response(resp): metadata.append(source['doi']) metadata = ' / '.join(metadata) - results.append({ - 'url': source['urls'][0].replace('http://', 'https://', 1), - 'title': source['title'], - 'content': source['description'], - 'publishedDate': date, - 'metadata' : metadata, - }) + results.append( + { + 'url': source['urls'][0].replace('http://', 'https://', 1), + 'title': source['title'], + 'content': source['description'], + 'publishedDate': date, + 'metadata': metadata, + } + ) return results diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index d4c3b5f81..969688126 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -30,7 +30,7 @@ def request(query, params): def response(resp): """remove first and last lines to get only json""" - json_resp = resp.text[resp.text.find('\n') + 1:resp.text.rfind('\n') - 2] + json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2] results = [] try: conversion_rate = float(json.loads(json_resp)['conversion']['converted-amount']) @@ -47,7 +47,8 @@ def response(resp): ) url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'.format( - resp.search_params['from'].upper(), resp.search_params['to']) + resp.search_params['from'].upper(), resp.search_params['to'] + ) results.append({'answer': answer, 'url': url}) diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 92d368c11..5607691a4 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -25,8 +25,10 @@ paging = True # search-url # see http://www.dailymotion.com/doc/api/obj-video.html search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}' # noqa -embedded_url = '<iframe frameborder="0" width="540" height="304" ' +\ - 'data-src="https://www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>' +embedded_url = ( + '<iframe frameborder="0" width="540" height="304" ' + + 'data-src="https://www.dailymotion.com/embed/video/{videoid}" allowfullscreen></iframe>' +) supported_languages_url = 'https://api.dailymotion.com/languages' @@ -39,8 +41,8 @@ def request(query, params): locale = match_language(params['language'], supported_languages) params['url'] = search_url.format( - query=urlencode({'search': query, 'localization': locale}), - pageno=params['pageno']) + query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno'] + ) return params @@ -67,13 +69,17 @@ def response(resp): # http to https thumbnail = thumbnail.replace("http://", "https://") - results.append({'template': 'videos.html', - 'url': url, - 'title': title, - 'content': content, - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'template': 'videos.html', + 'url': url, + 'title': title, + 'content': content, + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 946bd3ebe..220ac599d 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -24,9 +24,11 @@ paging = True url = 'https://api.deezer.com/' search_url = url + 'search?{query}&index={offset}' -embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\ - 'data-src="https://www.deezer.com/plugins/player?type=tracks&id={audioid}" ' +\ - 'width="540" height="80"></iframe>' +embedded_url = ( + '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' + + 'data-src="https://www.deezer.com/plugins/player?type=tracks&id={audioid}" ' + + 'width="540" height="80"></iframe>' +) # do search-request @@ -53,18 +55,12 @@ def response(resp): if url.startswith('http://'): url = 'https' + url[4:] - content = '{} - {} - {}'.format( - result['artist']['name'], - result['album']['title'], - result['title']) + content = '{} - {} - {}'.format(result['artist']['name'], result['album']['title'], result['title']) embedded = embedded_url.format(audioid=result['id']) # append result - results.append({'url': url, - 'title': title, - 'embedded': embedded, - 'content': content}) + results.append({'url': url, 'title': title, 'embedded': embedded, 'content': content}) # return results return results diff --git a/searx/engines/demo_offline.py b/searx/engines/demo_offline.py index a4a632180..aeb74f443 100644 --- a/searx/engines/demo_offline.py +++ b/searx/engines/demo_offline.py @@ -31,6 +31,7 @@ about = { # if there is a need for globals, use a leading underline _my_offline_engine = None + def init(engine_settings=None): """Initialization of the (offline) engine. The origin of this demo engine is a simple json string which is loaded in this example while the engine is @@ -44,11 +45,10 @@ def init(engine_settings=None): ', {"value":"first item"}' ', {"value":"second item"}' ', {"value":"third item"}' - ']' - - % engine_settings.get('name') + ']' % engine_settings.get('name') ) + def search(query, request_params): """Query (offline) engine and return results. Assemble the list of results from your local engine. In this demo engine we ignore the 'query' term, usual @@ -62,11 +62,11 @@ def search(query, request_params): for row in result_list: entry = { - 'query' : query, - 'language' : request_params['language'], - 'value' : row.get("value"), + 'query': query, + 'language': request_params['language'], + 'value': row.get("value"), # choose a result template or comment out to use the *default* - 'template' : 'key-value.html', + 'template': 'key-value.html', } ret_val.append(entry) diff --git a/searx/engines/demo_online.py b/searx/engines/demo_online.py index a0f736e42..e53b3c15e 100644 --- a/searx/engines/demo_online.py +++ b/searx/engines/demo_online.py @@ -43,6 +43,7 @@ about = { # if there is a need for globals, use a leading underline _my_online_engine = None + def init(engine_settings): """Initialization of the (online) engine. If no initialization is needed, drop this init function. @@ -51,20 +52,24 @@ def init(engine_settings): global _my_online_engine # pylint: disable=global-statement _my_online_engine = engine_settings.get('name') + def request(query, params): """Build up the ``params`` for the online request. In this example we build a URL to fetch images from `artic.edu <https://artic.edu>`__ """ - args = urlencode({ - 'q' : query, - 'page' : params['pageno'], - 'fields' : 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', - 'limit' : page_size, - }) + args = urlencode( + { + 'q': query, + 'page': params['pageno'], + 'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', + 'limit': page_size, + } + ) params['url'] = search_api + args return params + def response(resp): """Parse out the result items from the response. In this example we parse the response from `api.artic.edu <https://artic.edu>`__ and filter out all @@ -79,14 +84,16 @@ def response(resp): if not result['image_id']: continue - results.append({ - 'url': 'https://artic.edu/artworks/%(id)s' % result, - 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, - 'content': result['medium_display'], - 'author': ', '.join(result['artist_titles']), - 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, - 'img_format': result['dimensions'], - 'template': 'images.html' - }) + results.append( + { + 'url': 'https://artic.edu/artworks/%(id)s' % result, + 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, + 'content': result['medium_display'], + 'author': ', '.join(result['artist_titles']), + 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, + 'img_format': result['dimensions'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index b13d54dd5..e44ac28e5 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -32,13 +32,14 @@ time_range_dict = { # search-url base_url = 'https://www.deviantart.com' + def request(query, params): # https://www.deviantart.com/search/deviations?page=5&q=foo - query = { - 'page' : params['pageno'], - 'q' : query, + query = { + 'page': params['pageno'], + 'q': query, } if params['time_range'] in time_range_dict: query['order'] = time_range_dict[params['time_range']] @@ -47,6 +48,7 @@ def request(query, params): return params + def response(resp): results = [] @@ -67,11 +69,13 @@ def response(resp): continue img_tag = img_tag[0] - results.append({ - 'template': 'images.html', - 'url': a_tag.attrib.get('href'), - 'img_src': img_tag.attrib.get('src'), - 'title': img_tag.attrib.get('alt'), - }) + results.append( + { + 'template': 'images.html', + 'url': a_tag.attrib.get('href'), + 'img_src': img_tag.attrib.get('src'), + 'title': img_tag.attrib.get('alt'), + } + ) return results diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 4a92a22c3..126e75374 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -27,9 +27,7 @@ https_support = True def request(query, params): - params['url'] = url.format(from_lang=params['from_lang'][2], - to_lang=params['to_lang'][2], - query=params['query']) + params['url'] = url.format(from_lang=params['from_lang'][2], to_lang=params['to_lang'][2], query=params['query']) return params @@ -51,10 +49,12 @@ def response(resp): if t.strip(): to_results.append(to_result.text_content()) - results.append({ - 'url': urljoin(str(resp.url), '?%d' % k), - 'title': from_result.text_content(), - 'content': '; '.join(to_results) - }) + results.append( + { + 'url': urljoin(str(resp.url), '?%d' % k), + 'title': from_result.text_content(), + 'content': '; '.join(to_results), + } + ) return results diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index 109662a49..2914e9228 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -48,13 +48,17 @@ def response(resp): filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] - results.append({'url': url, - 'title': title, - 'content': content, - 'filesize': filesize, - 'magnetlink': magnetlink, - 'seed': 'N/A', - 'leech': 'N/A', - 'template': 'torrent.html'}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'filesize': filesize, + 'magnetlink': magnetlink, + 'seed': 'N/A', + 'leech': 'N/A', + 'template': 'torrent.html', + } + ) return results diff --git a/searx/engines/docker_hub.py b/searx/engines/docker_hub.py index e69f677b3..1e492b196 100644 --- a/searx/engines/docker_hub.py +++ b/searx/engines/docker_hub.py @@ -9,13 +9,13 @@ from urllib.parse import urlencode from dateutil import parser about = { - "website": 'https://hub.docker.com', - "wikidata_id": 'Q100769064', - "official_api_documentation": 'https://docs.docker.com/registry/spec/api/', - "use_official_api": True, - "require_api_key": False, - "results": 'JSON', - } + "website": 'https://hub.docker.com', + "wikidata_id": 'Q100769064', + "official_api_documentation": 'https://docs.docker.com/registry/spec/api/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} categories = ['it'] # optional paging = True @@ -23,6 +23,7 @@ paging = True base_url = "https://hub.docker.com/" search_url = base_url + "api/content/v1/products/search?{query}&type=image&page_size=25" + def request(query, params): params['url'] = search_url.format(query=urlencode(dict(q=query, page=params["pageno"]))) @@ -30,6 +31,7 @@ def request(query, params): return params + def response(resp): '''post-response callback resp: requests response object @@ -53,12 +55,8 @@ def response(resp): result["url"] = base_url + "r/" + item.get('slug', "") result["title"] = item.get("name") result["content"] = item.get("short_description") - result["publishedDate"] = parser.parse( - item.get("updated_at") or item.get("created_at") - ) - result["thumbnail"] = ( - item["logo_url"].get("large") or item["logo_url"].get("small") - ) + result["publishedDate"] = parser.parse(item.get("updated_at") or item.get("created_at")) + result["thumbnail"] = item["logo_url"].get("large") or item["logo_url"].get("small") results.append(result) return results diff --git a/searx/engines/doku.py b/searx/engines/doku.py index cf38b3b9a..e81131cce 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -25,8 +25,7 @@ number_of_results = 5 # search-url # Doku is OpenSearch compatible base_url = 'http://localhost:8090' -search_url = '/?do=search'\ - '&{query}' +search_url = '/?do=search' '&{query}' # TODO '&startRecord={offset}'\ # TODO '&maximumRecords={limit}'\ @@ -34,8 +33,7 @@ search_url = '/?do=search'\ # do search-request def request(query, params): - params['url'] = base_url +\ - search_url.format(query=urlencode({'id': query})) + params['url'] = base_url + search_url.format(query=urlencode({'id': query})) return params @@ -60,9 +58,7 @@ def response(resp): title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title')) # append result - results.append({'title': title, - 'content': "", - 'url': base_url + res_url}) + results.append({'title': title, 'content': "", 'url': base_url + res_url}) # Search results for r in eval_xpath(doc, '//dl[@class="search_results"]/*'): @@ -74,9 +70,7 @@ def response(resp): content = extract_text(eval_xpath(r, '.')) # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url}) + results.append({'title': title, 'content': content, 'url': base_url + res_url}) except: continue diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index d283af81d..0d2a524df 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -39,15 +39,10 @@ language_aliases = { 'ko': 'kr-KR', 'sl-SI': 'sl-SL', 'zh-TW': 'tzh-TW', - 'zh-HK': 'tzh-HK' + 'zh-HK': 'tzh-HK', } -time_range_dict = { - 'day': 'd', - 'week': 'w', - 'month': 'm', - 'year': 'y' -} +time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} # search-url url = 'https://lite.duckduckgo.com/lite' @@ -118,6 +113,7 @@ def request(query, params): logger.debug("param cookies: %s", params['cookies']) return params + # get response from search-request def response(resp): @@ -163,21 +159,24 @@ def response(resp): if td_content is None: continue - results.append({ - 'title': a_tag.text_content(), - 'content': extract_text(td_content), - 'url': a_tag.get('href'), - }) + results.append( + { + 'title': a_tag.text_content(), + 'content': extract_text(td_content), + 'url': a_tag.get('href'), + } + ) return results + # get supported languages from their site def _fetch_supported_languages(resp): # response is a js file with regions as an embedded object response_page = resp.text - response_page = response_page[response_page.find('regions:{') + 8:] - response_page = response_page[:response_page.find('}') + 1] + response_page = response_page[response_page.find('regions:{') + 8 :] + response_page = response_page[: response_page.find('}') + 1] regions_json = loads(response_page) supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 3ef043964..d4e813c2b 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -10,7 +10,10 @@ from lxml import html from searx.data import WIKIDATA_UNITS from searx.engines.duckduckgo import language_aliases -from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.duckduckgo import ( + _fetch_supported_languages, + supported_languages_url, +) # NOQA # pylint: disable=unused-import from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom @@ -24,19 +27,15 @@ about = { "results": 'JSON', } -URL = 'https://api.duckduckgo.com/'\ - + '?{query}&format=json&pretty=0&no_redirect=1&d=1' +URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1' -WIKIDATA_PREFIX = [ - 'http://www.wikidata.org/entity/', - 'https://www.wikidata.org/entity/' -] +WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/'] replace_http_by_https = get_string_replaces_function({'http:': 'https:'}) def is_broken_text(text): - """ duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>" + """duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>" The href URL is broken, the "Related website" may contains some HTML. @@ -61,11 +60,7 @@ def result_to_text(text, htmlResult): def request(query, params): params['url'] = URL.format(query=urlencode({'q': query})) - language = match_language( - params['language'], - supported_languages, - language_aliases - ) + language = match_language(params['language'], supported_languages, language_aliases) language = language.split('-')[0] params['headers']['Accept-Language'] = language return params @@ -127,23 +122,14 @@ def response(resp): firstURL = ddg_result.get('FirstURL') text = ddg_result.get('Text') if not is_broken_text(text): - suggestion = result_to_text( - text, - ddg_result.get('Result') - ) + suggestion = result_to_text(text, ddg_result.get('Result')) if suggestion != heading and suggestion is not None: results.append({'suggestion': suggestion}) elif 'Topics' in ddg_result: suggestions = [] - relatedTopics.append({ - 'name': ddg_result.get('Name', ''), - 'suggestions': suggestions - }) + relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions}) for topic_result in ddg_result.get('Topics', []): - suggestion = result_to_text( - topic_result.get('Text'), - topic_result.get('Result') - ) + suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result')) if suggestion != heading and suggestion is not None: suggestions.append(suggestion) @@ -152,25 +138,15 @@ def response(resp): if abstractURL != '': # add as result ? problem always in english infobox_id = abstractURL - urls.append({ - 'title': search_res.get('AbstractSource'), - 'url': abstractURL, - 'official': True - }) - results.append({ - 'url': abstractURL, - 'title': heading - }) + urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True}) + results.append({'url': abstractURL, 'title': heading}) # definition definitionURL = search_res.get('DefinitionURL', '') if definitionURL != '': # add as result ? as answer ? problem always in english infobox_id = definitionURL - urls.append({ - 'title': search_res.get('DefinitionSource'), - 'url': definitionURL - }) + urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) # to merge with wikidata's infobox if infobox_id: @@ -198,10 +174,7 @@ def response(resp): # * netflix_id external_url = get_external_url(data_type, data_value) if external_url is not None: - urls.append({ - 'title': data_label, - 'url': external_url - }) + urls.append({'title': data_label, 'url': external_url}) elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']: # ignore instance: Wikidata value from "Instance Of" (Qxxxx) # ignore wiki_maps_trigger: reference to a javascript @@ -211,11 +184,7 @@ def response(resp): # There is already an URL for the website pass elif data_type == 'area': - attributes.append({ - 'label': data_label, - 'value': area_to_str(data_value), - 'entity': 'P2046' - }) + attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'}) osm_zoom = area_to_osm_zoom(data_value.get('amount')) elif data_type == 'coordinates': if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2': @@ -224,16 +193,9 @@ def response(resp): coordinates = info else: # coordinate NOT on Earth - attributes.append({ - 'label': data_label, - 'value': data_value, - 'entity': 'P625' - }) + attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'}) elif data_type == 'string': - attributes.append({ - 'label': data_label, - 'value': data_value - }) + attributes.append({'label': data_label, 'value': data_value}) if coordinates: data_label = coordinates.get('label') @@ -241,31 +203,24 @@ def response(resp): latitude = data_value.get('latitude') longitude = data_value.get('longitude') url = get_earth_coordinates_url(latitude, longitude, osm_zoom) - urls.append({ - 'title': 'OpenStreetMap', - 'url': url, - 'entity': 'P625' - }) + urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'}) if len(heading) > 0: # TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme - if image is None and len(attributes) == 0 and len(urls) == 1 and\ - len(relatedTopics) == 0 and len(content) == 0: - results.append({ - 'url': urls[0]['url'], - 'title': heading, - 'content': content - }) + if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0: + results.append({'url': urls[0]['url'], 'title': heading, 'content': content}) else: - results.append({ - 'infobox': heading, - 'id': infobox_id, - 'content': content, - 'img_src': image, - 'attributes': attributes, - 'urls': urls, - 'relatedTopics': relatedTopics - }) + results.append( + { + 'infobox': heading, + 'id': infobox_id, + 'content': content, + 'img_src': image, + 'attributes': attributes, + 'urls': urls, + 'relatedTopics': relatedTopics, + } + ) return results @@ -273,7 +228,7 @@ def response(resp): def unit_to_str(unit): for prefix in WIKIDATA_PREFIX: if unit.startswith(prefix): - wikidata_entity = unit[len(prefix):] + wikidata_entity = unit[len(prefix) :] return WIKIDATA_UNITS.get(wikidata_entity, unit) return unit diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py index 0daaf41e9..21e24d2c9 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_images.py @@ -7,7 +7,10 @@ from json import loads from urllib.parse import urlencode from searx.exceptions import SearxEngineAPIException from searx.engines.duckduckgo import get_region_code -from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.duckduckgo import ( + _fetch_supported_languages, + supported_languages_url, +) # NOQA # pylint: disable=unused-import from searx.network import get # about @@ -41,8 +44,8 @@ def get_vqd(query, headers): content = res.text if content.find('vqd=\'') == -1: raise SearxEngineAPIException('Request failed') - vqd = content[content.find('vqd=\'') + 5:] - vqd = vqd[:vqd.find('\'')] + vqd = content[content.find('vqd=\'') + 5 :] + vqd = vqd[: vqd.find('\'')] return vqd @@ -61,10 +64,10 @@ def request(query, params): region_code = get_region_code(params['language'], lang_list=supported_languages) if region_code: params['url'] = images_url.format( - query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd) + query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd + ) else: - params['url'] = images_url.format( - query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) + params['url'] = images_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) return params @@ -84,11 +87,15 @@ def response(resp): image = result['image'] # append result - results.append({'template': 'images.html', - 'title': title, - 'content': '', - 'thumbnail_src': thumbnail, - 'img_src': image, - 'url': url}) + results.append( + { + 'template': 'images.html', + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail, + 'img_src': image, + 'url': url, + } + ) return results diff --git a/searx/engines/duden.py b/searx/engines/duden.py index bc4211c67..600b61f3c 100644 --- a/searx/engines/duden.py +++ b/searx/engines/duden.py @@ -38,7 +38,7 @@ def request(query, params): pageno : 1 # number of the requested page ''' - offset = (params['pageno'] - 1) + offset = params['pageno'] - 1 if offset == 0: search_url_fmt = base_url + 'suchen/dudenonline/{query}' params['url'] = search_url_fmt.format(query=quote(query)) @@ -58,9 +58,9 @@ def response(resp): dom = html.fromstring(resp.text) - number_of_results_element =\ - eval_xpath_getindex(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', - 0, default=None) + number_of_results_element = eval_xpath_getindex( + dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', 0, default=None + ) if number_of_results_element is not None: number_of_results_string = re.sub('[^0-9]', '', number_of_results_element) results.append({'number_of_results': int(number_of_results_string)}) @@ -71,8 +71,6 @@ def response(resp): title = eval_xpath(result, 'string(.//h2/a)').strip() content = extract_text(eval_xpath(result, './/p')) # append result - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) return results diff --git a/searx/engines/dummy-offline.py b/searx/engines/dummy-offline.py index cf2f75312..632eeb2b3 100644 --- a/searx/engines/dummy-offline.py +++ b/searx/engines/dummy-offline.py @@ -15,6 +15,8 @@ about = { def search(query, request_params): - return [{ - 'result': 'this is what you get', - }] + return [ + { + 'result': 'this is what you get', + } + ] diff --git a/searx/engines/ebay.py b/searx/engines/ebay.py index 45c633b42..b7aefcb44 100644 --- a/searx/engines/ebay.py +++ b/searx/engines/ebay.py @@ -58,16 +58,17 @@ def response(resp): if title == "": continue - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'price': price, - 'shipping': shipping, - 'source_country': source_country, - 'thumbnail': thumbnail, - 'template': 'products.html', - - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'price': price, + 'shipping': shipping, + 'source_country': source_country, + 'thumbnail': thumbnail, + 'template': 'products.html', + } + ) return results diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py index db84a5c13..f6e207b4d 100644 --- a/searx/engines/elasticsearch.py +++ b/searx/engines/elasticsearch.py @@ -119,9 +119,7 @@ def response(resp): r['template'] = 'key-value.html' if show_metadata: - r['metadata'] = {'index': result['_index'], - 'id': result['_id'], - 'score': result['_score']} + r['metadata'] = {'index': result['_index'], 'id': result['_id'], 'score': result['_score']} results.append(r) @@ -133,12 +131,10 @@ _available_query_types = { # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html 'match': _match_query, 'simple_query_string': _simple_query_string_query, - # Term-level queries # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html 'term': _term_query, 'terms': _terms_query, - # Query JSON defined by the instance administrator. 'custom': _custom_query, } diff --git a/searx/engines/etools.py b/searx/engines/etools.py index bf4f4ea1f..c66ceeb4b 100644 --- a/searx/engines/etools.py +++ b/searx/engines/etools.py @@ -22,10 +22,7 @@ paging = False safesearch = True base_url = 'https://www.etools.ch' -search_path = '/searchAdvancedSubmit.do'\ - '?query={search_term}'\ - '&pageResults=20'\ - '&safeSearch={safesearch}' +search_path = '/searchAdvancedSubmit.do' '?query={search_term}' '&pageResults=20' '&safeSearch={safesearch}' def request(query, params): @@ -49,8 +46,6 @@ def response(resp): title = extract_text(eval_xpath(result, './a//text()')) content = extract_text(eval_xpath(result, './/div[@class="text"]//text()')) - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) return results diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index 8fff2e384..c381b25d4 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -42,13 +42,13 @@ def response(resp): for app in dom.xpath('//a[@class="package-header"]'): app_url = app.xpath('./@href')[0] app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()')) - app_content = extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() \ - + ' - ' + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip() + app_content = ( + extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() + + ' - ' + + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip() + ) app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0] - results.append({'url': app_url, - 'title': app_title, - 'content': app_content, - 'img_src': app_img_src}) + results.append({'url': app_url, 'title': app_title, 'content': app_content, 'img_src': app_img_src}) return results diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index b0ddf6224..b7cd76808 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -25,10 +25,12 @@ paging = True api_key = None -url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\ - '&api_key={api_key}&{text}&sort=relevance' +\ - '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' +\ - '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +url = ( + 'https://api.flickr.com/services/rest/?method=flickr.photos.search' + + '&api_key={api_key}&{text}&sort=relevance' + + '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' + + '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +) photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' paging = True @@ -39,10 +41,9 @@ def build_flickr_url(user_id, photo_id): def request(query, params): - params['url'] = url.format(text=urlencode({'text': query}), - api_key=api_key, - nb_per_page=nb_per_page, - page=params['pageno']) + params['url'] = url.format( + text=urlencode({'text': query}), api_key=api_key, nb_per_page=nb_per_page, page=params['pageno'] + ) return params @@ -69,7 +70,7 @@ def response(resp): else: continue -# For a bigger thumbnail, keep only the url_z, not the url_n + # For a bigger thumbnail, keep only the url_z, not the url_n if 'url_n' in photo: thumbnail_src = photo['url_n'] elif 'url_z' in photo: @@ -80,13 +81,17 @@ def response(resp): url = build_flickr_url(photo['owner'], photo['id']) # append result - results.append({'url': url, - 'title': photo['title'], - 'img_src': img_src, - 'thumbnail_src': thumbnail_src, - 'content': photo['description']['_content'], - 'author': photo['ownername'], - 'template': 'images.html'}) + results.append( + { + 'url': url, + 'title': photo['title'], + 'img_src': img_src, + 'thumbnail_src': thumbnail_src, + 'content': photo['description']['_content'], + 'author': photo['ownername'], + 'template': 'images.html', + } + ) # return results return results diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 1d670ee50..4ff59fc52 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -30,10 +30,12 @@ image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') paging = True time_range_support = True -time_range_dict = {'day': 60 * 60 * 24, - 'week': 60 * 60 * 24 * 7, - 'month': 60 * 60 * 24 * 7 * 4, - 'year': 60 * 60 * 24 * 7 * 52} +time_range_dict = { + 'day': 60 * 60 * 24, + 'week': 60 * 60 * 24 * 7, + 'month': 60 * 60 * 24 * 7 * 4, + 'year': 60 * 60 * 24 * 7 * 52, +} def build_flickr_url(user_id, photo_id): @@ -47,8 +49,9 @@ def _get_time_range_url(time_range): def request(query, params): - params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno']) - + _get_time_range_url(params['time_range'])) + params['url'] = search_url.format(query=urlencode({'text': query}), page=params['pageno']) + _get_time_range_url( + params['time_range'] + ) return params @@ -83,10 +86,9 @@ def response(resp): for image_size in image_sizes: if image_size in photo['sizes']: img_src = photo['sizes'][image_size]['url'] - img_format = 'jpg ' \ - + str(photo['sizes'][image_size]['width']) \ - + 'x' \ - + str(photo['sizes'][image_size]['height']) + img_format = ( + 'jpg ' + str(photo['sizes'][image_size]['width']) + 'x' + str(photo['sizes'][image_size]['height']) + ) break if not img_src: @@ -113,7 +115,7 @@ def response(resp): 'thumbnail_src': thumbnail_src, 'source': source, 'img_format': img_format, - 'template': 'images.html' + 'template': 'images.html', } result['author'] = author.encode(errors='ignore').decode() result['source'] = source.encode(errors='ignore').decode() diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py index 42c08cf95..b2c9d9077 100644 --- a/searx/engines/framalibre.py +++ b/searx/engines/framalibre.py @@ -35,9 +35,8 @@ content_xpath = './/div[@class="content"]//p' # do search-request def request(query, params): - offset = (params['pageno'] - 1) - params['url'] = search_url.format(query=urlencode({'keys': query}), - offset=offset) + offset = params['pageno'] - 1 + params['url'] = search_url.format(query=urlencode({'keys': query}), offset=offset) return params @@ -63,10 +62,7 @@ def response(resp): content = escape(extract_text(result.xpath(content_xpath))) # append result - results.append({'url': href, - 'title': title, - 'img_src': thumbnail, - 'content': content}) + results.append({'url': href, 'title': title, 'img_src': thumbnail, 'content': content}) # return results return results diff --git a/searx/engines/freesound.py b/searx/engines/freesound.py index d2564946c..121a6a5b0 100644 --- a/searx/engines/freesound.py +++ b/searx/engines/freesound.py @@ -26,8 +26,7 @@ paging = True # search url url = "https://freesound.org/apiv2/" search_url = ( - url - + "search/text/?query={query}&page={page}&fields=name,url,download,created,description,type&token={api_key}" + url + "search/text/?query={query}&page={page}&fields=name,url,download,created,description,type&token={api_key}" ) embedded_url = '<audio controls><source src="{uri}" type="audio/{ftype}"></audio>' diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py index f43bb6e20..95a1366de 100644 --- a/searx/engines/frinkiac.py +++ b/searx/engines/frinkiac.py @@ -10,10 +10,7 @@ from urllib.parse import urlencode about = { "website": 'https://frinkiac.com', "wikidata_id": 'Q24882614', - "official_api_documentation": { - 'url': None, - 'comment': 'see https://github.com/MitchellAW/CompuGlobal' - }, + "official_api_documentation": {'url': None, 'comment': 'see https://github.com/MitchellAW/CompuGlobal'}, "use_official_api": False, "require_api_key": False, "results": 'JSON', @@ -40,12 +37,15 @@ def response(resp): episode = result['Episode'] timestamp = result['Timestamp'] - results.append({'template': 'images.html', - 'url': RESULT_URL.format(base=BASE, - query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})), - 'title': episode, - 'content': '', - 'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp), - 'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp)}) + results.append( + { + 'template': 'images.html', + 'url': RESULT_URL.format(base=BASE, query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})), + 'title': episode, + 'content': '', + 'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp), + 'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp), + } + ) return results diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index 325e132a6..5b9edafe0 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -37,15 +37,12 @@ def locale_to_lang_code(locale): # wikis for some languages were moved off from the main site, we need to make # requests to correct URLs to be able to get results in those languages lang_urls = { - 'en': { - 'base': 'https://wiki.gentoo.org', - 'search': '/index.php?title=Special:Search&offset={offset}&{query}' - }, + 'en': {'base': 'https://wiki.gentoo.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}'}, 'others': { 'base': 'https://wiki.gentoo.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}\ - &profile=translation&languagefilter={language}' - } + &profile=translation&languagefilter={language}', + }, } @@ -78,7 +75,7 @@ main_langs = { 'sl': 'Slovenský', 'th': 'ไทย', 'uk': 'Українська', - 'zh': '简体中文' + 'zh': '简体中文', } supported_languages = dict(lang_urls, **main_langs) @@ -101,8 +98,7 @@ def request(query, params): urls = get_lang_urls(language) search_url = urls['base'] + urls['search'] - params['url'] = search_url.format(query=query, offset=offset, - language=language) + params['url'] = search_url.format(query=query, offset=offset, language=language) return params @@ -123,7 +119,6 @@ def response(resp): href = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) - results.append({'url': href, - 'title': title}) + results.append({'url': href, 'title': title}) return results diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 0f685abc5..c657dca30 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -55,12 +55,12 @@ def fetch_extra_param(query_args, headers): extra_param_path = search_path + urlencode(query_args) text = get(base_url + extra_param_path, headers=headers).text - re_var= None + re_var = None for line in text.splitlines(): if re_var is None and extra_param_path in line: var = line.split("=")[0].split()[1] # e.g. var --> 'uxrl' re_var = re.compile(var + "\\s*=\\s*" + var + "\\s*\\+\\s*'" + "(.*)" + "'(.*)") - extra_param = line.split("'")[1][len(extra_param_path):] + extra_param = line.split("'")[1][len(extra_param_path) :] continue if re_var is not None and re_var.search(line): extra_param += re_var.search(line).group(1) @@ -69,12 +69,7 @@ def fetch_extra_param(query_args, headers): # do search-request def request(query, params): # pylint: disable=unused-argument - query_args = dict( - c = 'main' - , q = query - , dr = 1 - , showgoodimages = 0 - ) + query_args = dict(c='main', q=query, dr=1, showgoodimages=0) if params['language'] and params['language'] != 'all': query_args['qlangcountry'] = params['language'] @@ -93,6 +88,7 @@ def request(query, params): # pylint: disable=unused-argument return params + # get response from search-request def response(resp): results = [] @@ -125,10 +121,6 @@ def response(resp): if len(subtitle) > 3 and subtitle != title: title += " - " + subtitle - results.append(dict( - url = url - , title = title - , content = content - )) + results.append(dict(url=url, title=title, content=content)) return results diff --git a/searx/engines/github.py b/searx/engines/github.py index b68caa350..1d12d296a 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -55,9 +55,7 @@ def response(resp): content = '' # append result - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) # return results return results diff --git a/searx/engines/google.py b/searx/engines/google.py index 578dec60c..685697d29 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -50,72 +50,63 @@ supported_languages_url = 'https://www.google.com/preferences?#languages' # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests google_domains = { - 'BG': 'google.bg', # Bulgaria - 'CZ': 'google.cz', # Czech Republic - 'DE': 'google.de', # Germany - 'DK': 'google.dk', # Denmark - 'AT': 'google.at', # Austria - 'CH': 'google.ch', # Switzerland - 'GR': 'google.gr', # Greece + 'BG': 'google.bg', # Bulgaria + 'CZ': 'google.cz', # Czech Republic + 'DE': 'google.de', # Germany + 'DK': 'google.dk', # Denmark + 'AT': 'google.at', # Austria + 'CH': 'google.ch', # Switzerland + 'GR': 'google.gr', # Greece 'AU': 'google.com.au', # Australia - 'CA': 'google.ca', # Canada - 'GB': 'google.co.uk', # United Kingdom - 'ID': 'google.co.id', # Indonesia - 'IE': 'google.ie', # Ireland - 'IN': 'google.co.in', # India + 'CA': 'google.ca', # Canada + 'GB': 'google.co.uk', # United Kingdom + 'ID': 'google.co.id', # Indonesia + 'IE': 'google.ie', # Ireland + 'IN': 'google.co.in', # India 'MY': 'google.com.my', # Malaysia - 'NZ': 'google.co.nz', # New Zealand + 'NZ': 'google.co.nz', # New Zealand 'PH': 'google.com.ph', # Philippines 'SG': 'google.com.sg', # Singapore - 'US': 'google.com', # United States (google.us) redirects to .com - 'ZA': 'google.co.za', # South Africa + 'US': 'google.com', # United States (google.us) redirects to .com + 'ZA': 'google.co.za', # South Africa 'AR': 'google.com.ar', # Argentina - 'CL': 'google.cl', # Chile - 'ES': 'google.es', # Spain + 'CL': 'google.cl', # Chile + 'ES': 'google.es', # Spain 'MX': 'google.com.mx', # Mexico - 'EE': 'google.ee', # Estonia - 'FI': 'google.fi', # Finland - 'BE': 'google.be', # Belgium - 'FR': 'google.fr', # France - 'IL': 'google.co.il', # Israel - 'HR': 'google.hr', # Croatia - 'HU': 'google.hu', # Hungary - 'IT': 'google.it', # Italy - 'JP': 'google.co.jp', # Japan - 'KR': 'google.co.kr', # South Korea - 'LT': 'google.lt', # Lithuania - 'LV': 'google.lv', # Latvia - 'NO': 'google.no', # Norway - 'NL': 'google.nl', # Netherlands - 'PL': 'google.pl', # Poland + 'EE': 'google.ee', # Estonia + 'FI': 'google.fi', # Finland + 'BE': 'google.be', # Belgium + 'FR': 'google.fr', # France + 'IL': 'google.co.il', # Israel + 'HR': 'google.hr', # Croatia + 'HU': 'google.hu', # Hungary + 'IT': 'google.it', # Italy + 'JP': 'google.co.jp', # Japan + 'KR': 'google.co.kr', # South Korea + 'LT': 'google.lt', # Lithuania + 'LV': 'google.lv', # Latvia + 'NO': 'google.no', # Norway + 'NL': 'google.nl', # Netherlands + 'PL': 'google.pl', # Poland 'BR': 'google.com.br', # Brazil - 'PT': 'google.pt', # Portugal - 'RO': 'google.ro', # Romania - 'RU': 'google.ru', # Russia - 'SK': 'google.sk', # Slovakia - 'SI': 'google.si', # Slovenia - 'SE': 'google.se', # Sweden - 'TH': 'google.co.th', # Thailand + 'PT': 'google.pt', # Portugal + 'RO': 'google.ro', # Romania + 'RU': 'google.ru', # Russia + 'SK': 'google.sk', # Slovakia + 'SI': 'google.si', # Slovenia + 'SE': 'google.se', # Sweden + 'TH': 'google.co.th', # Thailand 'TR': 'google.com.tr', # Turkey 'UA': 'google.com.ua', # Ukraine 'CN': 'google.com.hk', # There is no google.cn, we use .com.hk for zh-CN 'HK': 'google.com.hk', # Hong Kong - 'TW': 'google.com.tw' # Taiwan + 'TW': 'google.com.tw', # Taiwan } -time_range_dict = { - 'day': 'd', - 'week': 'w', - 'month': 'm', - 'year': 'y' -} +time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} # Filter results. 0: None, 1: Moderate, 2: Strict -filter_mapping = { - 0: 'off', - 1: 'medium', - 2: 'high' -} +filter_mapping = {0: 'off', 1: 'medium', 2: 'high'} # specific xpath variables # ------------------------ @@ -140,6 +131,7 @@ content_xpath = './/div[@class="IsZvec"]' # from the links not the links itself. suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a' + def get_lang_info(params, lang_list, custom_aliases, supported_any_language): """Composing various language properties for the google engines. @@ -184,11 +176,11 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): request's headers) """ ret_val = { - 'language' : None, - 'country' : None, - 'subdomain' : None, - 'params' : {}, - 'headers' : {}, + 'language': None, + 'country': None, + 'subdomain': None, + 'params': {}, + 'headers': {}, } # language ... @@ -213,7 +205,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): # subdomain ... - ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') + ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') # params & headers @@ -250,15 +242,18 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language) # Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5 - ret_val['headers']['Accept-Language'] = ','.join([ - lang_country, - language + ';q=0.8,', - 'en;q=0.6', - '*;q=0.5', - ]) + ret_val['headers']['Accept-Language'] = ','.join( + [ + lang_country, + language + ';q=0.8,', + 'en;q=0.6', + '*;q=0.5', + ] + ) return ret_val + def detect_google_sorry(resp): if resp.url.host == 'sorry.google.com' or resp.url.path.startswith('/sorry'): raise SearxEngineCaptchaException() @@ -269,9 +264,7 @@ def request(query, params): offset = (params['pageno'] - 1) * 10 - lang_info = get_lang_info( - params, supported_languages, language_aliases, True - ) + lang_info = get_lang_info(params, supported_languages, language_aliases, True) additional_parameters = {} if use_mobile_ui: @@ -281,15 +274,23 @@ def request(query, params): } # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium - query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ - 'q': query, - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'start': offset, - 'filter': '0', - **additional_parameters, - }) + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/search' + + "?" + + urlencode( + { + 'q': query, + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + 'start': offset, + 'filter': '0', + **additional_parameters, + } + ) + ) if params['time_range'] in time_range_dict: query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) @@ -301,9 +302,7 @@ def request(query, params): if use_mobile_ui: params['headers']['Accept'] = '*/*' else: - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params @@ -325,7 +324,7 @@ def response(resp): else: logger.debug("did not find 'answer'") - # results --> number_of_results + # results --> number_of_results if not use_mobile_ui: try: _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0) @@ -355,11 +354,7 @@ def response(resp): if url is None: continue content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True) - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) except Exception as e: # pylint: disable=broad-except logger.error(e, exc_info=True) # from lxml import etree diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 61d291e3f..203df404a 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -30,10 +30,8 @@ from searx.engines.google import ( ) # pylint: disable=unused-import -from searx.engines.google import ( - supported_languages_url - , _fetch_supported_languages -) +from searx.engines.google import supported_languages_url, _fetch_supported_languages + # pylint: enable=unused-import # about @@ -53,21 +51,16 @@ use_locale_domain = True time_range_support = True safesearch = True -filter_mapping = { - 0: 'images', - 1: 'active', - 2: 'active' -} +filter_mapping = {0: 'images', 1: 'active', 2: 'active'} def scrap_out_thumbs(dom): - """Scrap out thumbnail data from <script> tags. - """ + """Scrap out thumbnail data from <script> tags.""" ret_val = {} for script in eval_xpath(dom, '//script[contains(., "_setImgSrc(")]'): _script = script.text # _setImgSrc('0','data:image\/jpeg;base64,\/9j\/4AAQSkZJR ....'); - _thumb_no, _img_data = _script[len("_setImgSrc("):-2].split(",", 1) + _thumb_no, _img_data = _script[len("_setImgSrc(") : -2].split(",", 1) _thumb_no = _thumb_no.replace("'", "") _img_data = _img_data.replace("'", "") _img_data = _img_data.replace(r"\/", r"/") @@ -76,8 +69,7 @@ def scrap_out_thumbs(dom): def scrap_img_by_id(script, data_id): - """Get full image URL by data-id in parent element - """ + """Get full image URL by data-id in parent element""" img_url = '' _script = script.split('\n') for i, line in enumerate(_script): @@ -91,20 +83,25 @@ def scrap_img_by_id(script, data_id): def request(query, params): """Google-Video search request""" - lang_info = get_lang_info( - params, supported_languages, language_aliases, False + lang_info = get_lang_info(params, supported_languages, language_aliases, False) + logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) + + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/search' + + "?" + + urlencode( + { + 'q': query, + 'tbm': "isch", + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + 'num': 30, + } + ) ) - logger.debug( - "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) - - query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ - 'q': query, - 'tbm': "isch", - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'num': 30, - }) if params['time_range'] in time_range_dict: query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) @@ -113,9 +110,7 @@ def request(query, params): params['url'] = query_url params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params @@ -128,8 +123,7 @@ def response(resp): # convert the text to dom dom = html.fromstring(resp.text) img_bas64_map = scrap_out_thumbs(dom) - img_src_script = eval_xpath_getindex( - dom, '//script[contains(., "AF_initDataCallback({key: ")]', 1).text + img_src_script = eval_xpath_getindex(dom, '//script[contains(., "AF_initDataCallback({key: ")]', 1).text # parse results # @@ -189,15 +183,17 @@ def response(resp): if not src_url: src_url = thumbnail_src - results.append({ - 'url': url, - 'title': img_alt, - 'content': pub_descr, - 'source': pub_source, - 'img_src': src_url, - # 'img_format': img_format, - 'thumbnail_src': thumbnail_src, - 'template': 'images.html' - }) + results.append( + { + 'url': url, + 'title': img_alt, + 'content': pub_descr, + 'source': pub_source, + 'img_src': src_url, + # 'img_format': img_format, + 'thumbnail_src': thumbnail_src, + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 87ac9a19d..162e4348e 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -32,6 +32,7 @@ from searx.engines.google import ( supported_languages_url, _fetch_supported_languages, ) + # pylint: enable=unused-import from searx.engines.google import ( @@ -71,14 +72,12 @@ time_range_support = True # safesearch : results are identitical for safesearch=0 and safesearch=2 safesearch = False + def request(query, params): """Google-News search request""" - lang_info = get_lang_info( - params, supported_languages, language_aliases, False - ) - logger.debug( - "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) + lang_info = get_lang_info(params, supported_languages, language_aliases, False) + logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) # google news has only one domain lang_info['subdomain'] = 'news.google.com' @@ -94,19 +93,26 @@ def request(query, params): if params['time_range']: query += ' ' + time_range_dict[params['time_range']] - query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ - 'q': query, - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'gl': lang_info['country'], - }) + ('&ceid=%s' % ceid) # ceid includes a ':' character which must not be urlencoded + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/search' + + "?" + + urlencode( + { + 'q': query, + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + 'gl': lang_info['country'], + } + ) + + ('&ceid=%s' % ceid) + ) # ceid includes a ':' character which must not be urlencoded params['url'] = query_url params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' params['headers']['Cookie'] = "CONSENT=YES+cb.%s-14-p0.en+F+941;" % datetime.now().strftime("%Y%m%d") return params @@ -141,7 +147,7 @@ def response(resp): # jslog="95014; 5:W251bGwsbnVsbCxudW...giXQ==; track:click" jslog = jslog.split(";")[1].split(':')[1].strip() try: - padding = (4 -(len(jslog) % 4)) * "=" + padding = (4 - (len(jslog) % 4)) * "=" jslog = b64decode(jslog + padding) except binascii.Error: # URL cant be read, skip this result @@ -178,12 +184,14 @@ def response(resp): img_src = extract_text(result.xpath('preceding-sibling::a/figure/img/@src')) - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'img_src': img_src, - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'img_src': img_src, + } + ) # return results return results diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py index e6726463d..e0700957c 100644 --- a/searx/engines/google_scholar.py +++ b/searx/engines/google_scholar.py @@ -32,6 +32,7 @@ from searx.engines.google import ( supported_languages_url, _fetch_supported_languages, ) + # pylint: enable=unused-import # about @@ -52,6 +53,7 @@ use_locale_domain = True time_range_support = True safesearch = False + def time_range_url(params): """Returns a URL query component for a google-Scholar time range based on ``params['time_range']``. Google-Scholar does only support ranges in years. @@ -64,7 +66,7 @@ def time_range_url(params): # as_ylo=2016&as_yhi=2019 ret_val = '' if params['time_range'] in time_range_dict: - ret_val= urlencode({'as_ylo': datetime.now().year -1 }) + ret_val = urlencode({'as_ylo': datetime.now().year - 1}) return '&' + ret_val @@ -72,34 +74,38 @@ def request(query, params): """Google-Scholar search request""" offset = (params['pageno'] - 1) * 10 - lang_info = get_lang_info( - params, supported_languages, language_aliases, False - ) - logger.debug( - "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) + lang_info = get_lang_info(params, supported_languages, language_aliases, False) + logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) # subdomain is: scholar.google.xy lang_info['subdomain'] = lang_info['subdomain'].replace("www.", "scholar.") - query_url = 'https://'+ lang_info['subdomain'] + '/scholar' + "?" + urlencode({ - 'q': query, - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'start' : offset, - }) + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/scholar' + + "?" + + urlencode( + { + 'q': query, + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + 'start': offset, + } + ) + ) query_url += time_range_url(params) params['url'] = query_url params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - #params['google_subdomain'] = subdomain + # params['google_subdomain'] = subdomain return params + def response(resp): """Get response from google's search request""" results = [] @@ -132,11 +138,13 @@ def response(resp): if pub_type: title = title + " " + pub_type - results.append({ - 'url': url, - 'title': title, - 'content': content, - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + } + ) # parse suggestion for suggestion in eval_xpath(dom, '//div[contains(@class, "gs_qsuggest_wrap")]//li//a'): diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index 77b0ab260..049f9138c 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -38,10 +38,8 @@ from searx.engines.google import ( ) # pylint: disable=unused-import -from searx.engines.google import ( - supported_languages_url - , _fetch_supported_languages -) +from searx.engines.google import supported_languages_url, _fetch_supported_languages + # pylint: enable=unused-import # about @@ -65,6 +63,7 @@ safesearch = True RE_CACHE = {} + def _re(regexpr): """returns compiled regular expression""" RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr)) @@ -77,18 +76,17 @@ def scrap_out_thumbs_src(dom): for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'): _script = script.text # "dimg_35":"https://i.ytimg.c....", - _dimurl = _re("s='([^']*)").findall( _script) - for k,v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)' ).findall(_script): - v = v.replace(r'\u003d','=') - v = v.replace(r'\u0026','&') + _dimurl = _re("s='([^']*)").findall(_script) + for k, v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)').findall(_script): + v = v.replace(r'\u003d', '=') + v = v.replace(r'\u0026', '&') ret_val[k] = v logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys()) return ret_val def scrap_out_thumbs(dom): - """Scrap out thumbnail data from <script> tags. - """ + """Scrap out thumbnail data from <script> tags.""" ret_val = {} thumb_name = 'dimg_' @@ -96,7 +94,7 @@ def scrap_out_thumbs(dom): _script = script.text # var s='data:image/jpeg;base64, ...' - _imgdata = _re("s='([^']*)").findall( _script) + _imgdata = _re("s='([^']*)").findall(_script) if not _imgdata: continue @@ -112,19 +110,24 @@ def scrap_out_thumbs(dom): def request(query, params): """Google-Video search request""" - lang_info = get_lang_info( - params, supported_languages, language_aliases, False + lang_info = get_lang_info(params, supported_languages, language_aliases, False) + logger.debug("HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) + + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/search' + + "?" + + urlencode( + { + 'q': query, + 'tbm': "vid", + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + } + ) ) - logger.debug( - "HTTP header Accept-Language --> %s", lang_info['headers']['Accept-Language']) - - query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ - 'q': query, - 'tbm': "vid", - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - }) if params['time_range'] in time_range_dict: query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) @@ -133,9 +136,7 @@ def request(query, params): params['url'] = query_url params['headers'].update(lang_info['headers']) - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params @@ -171,21 +172,22 @@ def response(resp): title = extract_text(eval_xpath_getindex(result, title_xpath, 0)) url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0) - length = extract_text(eval_xpath( - result, './/div[contains(@class, "P7xzyf")]/span/span')) + length = extract_text(eval_xpath(result, './/div[contains(@class, "P7xzyf")]/span/span')) c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0) content = extract_text(c_node) pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]')) - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'length': length, - 'author': pub_info, - 'thumbnail': img_src, - 'template': 'videos.html', - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'length': length, + 'author': pub_info, + 'thumbnail': img_src, + 'template': 'videos.html', + } + ) # parse suggestion for suggestion in eval_xpath_list(dom, suggestion_xpath): diff --git a/searx/engines/imdb.py b/searx/engines/imdb.py index a7474fd5b..bb6258cf4 100644 --- a/searx/engines/imdb.py +++ b/searx/engines/imdb.py @@ -27,7 +27,9 @@ about = { "results": 'HTML', } -categories = ['general', ] +categories = [ + 'general', +] paging = False # suggestion_url = "https://sg.media-imdb.com/suggestion/{letter}/{query}.json" @@ -35,13 +37,7 @@ suggestion_url = "https://v2.sg.media-imdb.com/suggestion/{letter}/{query}.json" href_base = 'https://imdb.com/{category}/{entry_id}' -search_categories = { - "nm": "name", - "tt": "title", - "kw": "keyword", - "co": "company", - "ep": "episode" -} +search_categories = {"nm": "name", "tt": "title", "kw": "keyword", "co": "company", "ep": "episode"} def request(query, params): @@ -63,9 +59,7 @@ def response(resp): entry_id = entry['id'] categ = search_categories.get(entry_id[:2]) if categ is None: - logger.error( - 'skip unknown category tag %s in %s', entry_id[:2], entry_id - ) + logger.error('skip unknown category tag %s in %s', entry_id[:2], entry_id) continue title = entry['l'] @@ -95,11 +89,13 @@ def response(resp): if not image_url_name.endswith('_V1_'): magic = '_V1_' + magic image_url = image_url_name + magic + '.' + image_url_prefix - results.append({ - "title": title, - "url": href_base.format(category=categ, entry_id=entry_id), - "content": content, - "img_src" : image_url, - }) + results.append( + { + "title": title, + "url": href_base.format(category=categ, entry_id=entry_id), + "content": content, + "img_src": image_url, + } + ) return results diff --git a/searx/engines/ina.py b/searx/engines/ina.py index 81172ef8c..1e21bcef8 100644 --- a/searx/engines/ina.py +++ b/searx/engines/ina.py @@ -41,9 +41,7 @@ content_xpath = './/p[@class="media-body__summary"]' # do search-request def request(query, params): - params['url'] = search_url.format(ps=page_size, - start=params['pageno'] * page_size, - query=urlencode({'q': query})) + params['url'] = search_url.format(ps=page_size, start=params['pageno'] * page_size, query=urlencode({'q': query})) return params @@ -75,12 +73,16 @@ def response(resp): content = extract_text(result.xpath(content_xpath)) # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/invidious.py b/searx/engines/invidious.py index 5d0b00edb..914615d6f 100644 --- a/searx/engines/invidious.py +++ b/searx/engines/invidious.py @@ -46,14 +46,10 @@ def request(query, params): base_url_rand = base_url search_url = base_url_rand + "api/v1/search?q={query}" - params["url"] = search_url.format( - query=quote_plus(query) - ) + "&page={pageno}".format(pageno=params["pageno"]) + params["url"] = search_url.format(query=quote_plus(query)) + "&page={pageno}".format(pageno=params["pageno"]) if params["time_range"] in time_range_dict: - params["url"] += "&date={timerange}".format( - timerange=time_range_dict[params["time_range"]] - ) + params["url"] += "&date={timerange}".format(timerange=time_range_dict[params["time_range"]]) if params["language"] != "all": lang = params["language"].split("-") @@ -88,17 +84,13 @@ def response(resp): url = base_invidious_url + videoid embedded = embedded_url.format(videoid=videoid) thumbs = result.get("videoThumbnails", []) - thumb = next( - (th for th in thumbs if th["quality"] == "sddefault"), None - ) + thumb = next((th for th in thumbs if th["quality"] == "sddefault"), None) if thumb: thumbnail = thumb.get("url", "") else: thumbnail = "" - publishedDate = parser.parse( - time.ctime(result.get("published", 0)) - ) + publishedDate = parser.parse(time.ctime(result.get("published", 0))) length = time.gmtime(result.get("lengthSeconds")) if length.tm_hour: length = time.strftime("%H:%M:%S", length) diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 8a04d34b2..f53bc0bf4 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -119,22 +119,22 @@ def response(resp): content = query(result, content_query)[0] except: content = "" - results.append({ - 'url': to_string(url), - 'title': title_filter(to_string(title)), - 'content': content_filter(to_string(content)), - }) + results.append( + { + 'url': to_string(url), + 'title': title_filter(to_string(title)), + 'content': content_filter(to_string(content)), + } + ) else: - for url, title, content in zip( - query(json, url_query), - query(json, title_query), - query(json, content_query) - ): - results.append({ - 'url': to_string(url), - 'title': title_filter(to_string(title)), - 'content': content_filter(to_string(content)), - }) + for url, title, content in zip(query(json, url_query), query(json, title_query), query(json, content_query)): + results.append( + { + 'url': to_string(url), + 'title': title_filter(to_string(title)), + 'content': content_filter(to_string(content)), + } + ) if not suggestion_query: return results diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index ad451dbb1..26364674c 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -34,8 +34,7 @@ content_xpath = './/span[@class="font11px lightgrey block"]' # do search-request def request(query, params): - params['url'] = search_url.format(search_term=quote(query), - pageno=params['pageno']) + params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno']) return params @@ -79,16 +78,20 @@ def response(resp): torrentfileurl = quote(torrentfile, safe="%/:=&?~#+!$,;'@()*") # append result - results.append({'url': href, - 'title': title, - 'content': content, - 'seed': seed, - 'leech': leech, - 'filesize': filesize, - 'files': files, - 'magnetlink': magnetlink, - 'torrentfile': torrentfileurl, - 'template': 'torrent.html'}) + results.append( + { + 'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'filesize': filesize, + 'files': files, + 'magnetlink': magnetlink, + 'torrentfile': torrentfileurl, + 'template': 'torrent.html', + } + ) # return results sorted by seeder return sorted(results, key=itemgetter('seed'), reverse=True) diff --git a/searx/engines/loc.py b/searx/engines/loc.py index 5c09ceff2..0b2f3a689 100644 --- a/searx/engines/loc.py +++ b/searx/engines/loc.py @@ -34,9 +34,7 @@ IMG_SRC_FIXES = { def request(query, params): - search_path = search_string.format( - query=urlencode({'q': query}), - page=params['pageno']) + search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno']) params['url'] = base_url + search_path @@ -56,13 +54,15 @@ def response(resp): break else: img_src = result['image']['thumb'] - results.append({ - 'url': result['links']['item'], - 'title': result['title'], - 'img_src': img_src, - 'thumbnail_src': result['image']['thumb'], - 'author': result['creator'], - 'template': 'images.html' - }) + results.append( + { + 'url': result['links']['item'], + 'title': result['title'], + 'img_src': img_src, + 'thumbnail_src': result['image']['thumb'], + 'author': result['creator'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/mediathekviewweb.py b/searx/engines/mediathekviewweb.py index d4cb853d4..991dcbc7b 100644 --- a/searx/engines/mediathekviewweb.py +++ b/searx/engines/mediathekviewweb.py @@ -22,29 +22,33 @@ paging = True time_range_support = False safesearch = False + def request(query, params): params['url'] = 'https://mediathekviewweb.de/api/query' params['method'] = 'POST' params['headers']['Content-type'] = 'text/plain' - params['data'] = dumps({ - 'queries' : [ - { - 'fields' : [ - 'title', - 'topic', - ], - 'query' : query - }, - ], - 'sortBy' : 'timestamp', - 'sortOrder' : 'desc', - 'future' : True, - 'offset' : (params['pageno'] - 1 )* 10, - 'size' : 10 - }) + params['data'] = dumps( + { + 'queries': [ + { + 'fields': [ + 'title', + 'topic', + ], + 'query': query, + }, + ], + 'sortBy': 'timestamp', + 'sortOrder': 'desc', + 'future': True, + 'offset': (params['pageno'] - 1) * 10, + 'size': 10, + } + ) return params + def response(resp): resp = loads(resp.text) @@ -58,11 +62,13 @@ def response(resp): item['hms'] = str(datetime.timedelta(seconds=item['duration'])) - results.append({ - 'url' : item['url_video_hd'], - 'title' : "%(channel)s: %(title)s (%(hms)s)" % item, - 'length' : item['hms'], - 'content' : "%(description)s" % item, - }) + results.append( + { + 'url': item['url_video_hd'], + 'title': "%(channel)s: %(title)s (%(hms)s)" % item, + 'length': item['hms'], + 'content': "%(description)s" % item, + } + ) return results diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index da4321250..9002e9ba7 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -25,23 +25,24 @@ search_type = 'nearmatch' # possible values: title, text, nearmatch # search-url base_url = 'https://{language}.wikipedia.org/' -search_postfix = 'w/api.php?action=query'\ - '&list=search'\ - '&{query}'\ - '&format=json'\ - '&sroffset={offset}'\ - '&srlimit={limit}'\ +search_postfix = ( + 'w/api.php?action=query' + '&list=search' + '&{query}' + '&format=json' + '&sroffset={offset}' + '&srlimit={limit}' '&srwhat={searchtype}' +) # do search-request def request(query, params): offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=urlencode({'srsearch': query}), - offset=offset, - limit=number_of_results, - searchtype=search_type) + string_args = dict( + query=urlencode({'srsearch': query}), offset=offset, limit=number_of_results, searchtype=search_type + ) format_strings = list(Formatter().parse(base_url)) @@ -78,13 +79,14 @@ def response(resp): for result in search_results['query']['search']: if result.get('snippet', '').startswith('#REDIRECT'): continue - url = base_url.format(language=resp.search_params['language']) +\ - 'wiki/' + quote(result['title'].replace(' ', '_').encode()) + url = ( + base_url.format(language=resp.search_params['language']) + + 'wiki/' + + quote(result['title'].replace(' ', '_').encode()) + ) # append result - results.append({'url': url, - 'title': result['title'], - 'content': ''}) + results.append({'url': url, 'title': result['title'], 'content': ''}) # return results return results diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py index c99611049..a869daf2f 100644 --- a/searx/engines/microsoft_academic.py +++ b/searx/engines/microsoft_academic.py @@ -26,17 +26,19 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['headers']['content-type'] = 'application/json; charset=utf-8' - params['data'] = dumps({ - 'query': query, - 'queryExpression': '', - 'filters': [], - 'orderBy': 0, - 'skip': (params['pageno'] - 1) * 10, - 'sortAscending': True, - 'take': 10, - 'includeCitationContexts': False, - 'profileId': '', - }) + params['data'] = dumps( + { + 'query': query, + 'queryExpression': '', + 'filters': [], + 'orderBy': 0, + 'skip': (params['pageno'] - 1) * 10, + 'sortAscending': True, + 'take': 10, + 'includeCitationContexts': False, + 'profileId': '', + } + ) return params @@ -54,11 +56,13 @@ def response(resp): title = result['paper']['dn'] content = _get_content(result['paper']) url = _paper_url.format(id=result['paper']['id']) - results.append({ - 'url': url, - 'title': html_to_text(title), - 'content': html_to_text(content), - }) + results.append( + { + 'url': url, + 'title': html_to_text(title), + 'content': html_to_text(content), + } + ) return results diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py index a6fd1c0a1..f5e0f55fc 100644 --- a/searx/engines/mixcloud.py +++ b/searx/engines/mixcloud.py @@ -25,16 +25,17 @@ paging = True url = 'https://api.mixcloud.com/' search_url = url + 'search/?{query}&type=cloudcast&limit=10&offset={offset}' -embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' +\ - 'data-src="https://www.mixcloud.com/widget/iframe/?feed={url}" width="300" height="300"></iframe>' +embedded_url = ( + '<iframe scrolling="no" frameborder="0" allowTransparency="true" ' + + 'data-src="https://www.mixcloud.com/widget/iframe/?feed={url}" width="300" height="300"></iframe>' +) # do search-request def request(query, params): offset = (params['pageno'] - 1) * 10 - params['url'] = search_url.format(query=urlencode({'q': query}), - offset=offset) + params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset) return params @@ -54,11 +55,9 @@ def response(resp): publishedDate = parser.parse(result['created_time']) # append result - results.append({'url': url, - 'title': title, - 'embedded': embedded, - 'publishedDate': publishedDate, - 'content': content}) + results.append( + {'url': url, 'title': title, 'embedded': embedded, 'publishedDate': publishedDate, 'content': content} + ) # return results return results diff --git a/searx/engines/mongodb.py b/searx/engines/mongodb.py index 2ebb90539..c833ca9e0 100644 --- a/searx/engines/mongodb.py +++ b/searx/engines/mongodb.py @@ -26,38 +26,35 @@ result_template = 'key-value.html' _client = None + def init(_): connect() + def connect(): global _client # pylint: disable=global-statement - kwargs = { 'port': port } + kwargs = {'port': port} if username: kwargs['username'] = username if password: kwargs['password'] = password _client = MongoClient(host, **kwargs)[database][collection] + def search(query, params): results = [] if exact_match_only: - q = { '$eq': query } + q = {'$eq': query} else: - _re = re.compile('.*{0}.*'.format(re.escape(query)), re.I | re.M ) - q = { '$regex': _re } + _re = re.compile('.*{0}.*'.format(re.escape(query)), re.I | re.M) + q = {'$regex': _re} - query = _client.find( - {key: q} - ).skip( - ( params['pageno'] -1 ) * results_per_page - ).limit( - results_per_page - ) + query = _client.find({key: q}).skip((params['pageno'] - 1) * results_per_page).limit(results_per_page) - results.append({ 'number_of_results': query.count() }) + results.append({'number_of_results': query.count()}) for r in query: del r['_id'] - r = { str(k):str(v) for k,v in r.items() } + r = {str(k): str(v) for k, v in r.items()} r['template'] = result_template results.append(r) diff --git a/searx/engines/mysql_server.py b/searx/engines/mysql_server.py index be89eb86e..d949ee0bc 100644 --- a/searx/engines/mysql_server.py +++ b/searx/engines/mysql_server.py @@ -20,6 +20,7 @@ paging = True result_template = 'key-value.html' _connection = None + def init(engine_settings): global _connection # pylint: disable=global-statement @@ -30,13 +31,14 @@ def init(engine_settings): raise ValueError('only SELECT query is supported') _connection = mysql.connector.connect( - database = database, - user = username, - password = password, - host = host, + database=database, + user=username, + password=password, + host=host, auth_plugin=auth_plugin, ) + def search(query, params): query_params = {'query': query} query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit) @@ -46,6 +48,7 @@ def search(query, params): return _fetch_results(cur) + def _fetch_results(cur): results = [] for res in cur: diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index 4fe383efa..bdd3ea6dc 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -98,14 +98,18 @@ def response(resp): content = 'Category: "{category}". Downloaded {downloads} times.' content = content.format(category=category, downloads=downloads) - results.append({'url': href, - 'title': title, - 'content': content, - 'seed': seed, - 'leech': leech, - 'filesize': filesize, - 'torrentfile': torrent_link, - 'magnetlink': magnet_link, - 'template': 'torrent.html'}) + results.append( + { + 'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'filesize': filesize, + 'torrentfile': torrent_link, + 'magnetlink': magnet_link, + 'template': 'torrent.html', + } + ) return results diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index c6211a004..946869834 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -151,10 +151,12 @@ def response(resp): user_language = resp.search_params['language'] if resp.search_params['route']: - results.append({ - 'answer': gettext('Get directions'), - 'url': route_url.format(*resp.search_params['route'].groups()), - }) + results.append( + { + 'answer': gettext('Get directions'), + 'url': route_url.format(*resp.search_params['route'].groups()), + } + ) fetch_wikidata(nominatim_json, user_language) @@ -170,26 +172,26 @@ def response(resp): links, link_keys = get_links(result, user_language) data = get_data(result, user_language, link_keys) - results.append({ - 'template': 'map.html', - 'title': title, - 'address': address, - 'address_label': get_key_label('addr', user_language), - 'url': url, - 'osm': osm, - 'geojson': geojson, - 'img_src': img_src, - 'links': links, - 'data': data, - 'type': get_tag_label( - result.get('category'), result.get('type', ''), user_language - ), - 'type_icon': result.get('icon'), - 'content': '', - 'longitude': result['lon'], - 'latitude': result['lat'], - 'boundingbox': result['boundingbox'], - }) + results.append( + { + 'template': 'map.html', + 'title': title, + 'address': address, + 'address_label': get_key_label('addr', user_language), + 'url': url, + 'osm': osm, + 'geojson': geojson, + 'img_src': img_src, + 'links': links, + 'data': data, + 'type': get_tag_label(result.get('category'), result.get('type', ''), user_language), + 'type_icon': result.get('icon'), + 'content': '', + 'longitude': result['lon'], + 'latitude': result['lat'], + 'boundingbox': result['boundingbox'], + } + ) return results @@ -270,9 +272,9 @@ def get_title_address(result): # https://github.com/osm-search/Nominatim/issues/1662 address_name = address_raw.get('address29') else: - address_name = address_raw.get(result['category']) + address_name = address_raw.get(result['category']) elif result['type'] in address_raw: - address_name = address_raw.get(result['type']) + address_name = address_raw.get(result['type']) # add rest of adressdata, if something is already found if address_name: @@ -297,8 +299,7 @@ def get_title_address(result): def get_url_osm_geojson(result): - """Get url, osm and geojson - """ + """Get url, osm and geojson""" osm_type = result.get('osm_type', result.get('type')) if 'osm_id' not in result: # see https://github.com/osm-search/Nominatim/issues/1521 @@ -349,11 +350,13 @@ def get_links(result, user_language): url, url_label = mapping_function(raw_value) if url.startswith('https://wikidata.org'): url_label = result.get('wikidata', {}).get('itemLabel') or url_label - links.append({ - 'label': get_key_label(k, user_language), - 'url': url, - 'url_label': url_label, - }) + links.append( + { + 'label': get_key_label(k, user_language), + 'url': url, + 'url_label': url_label, + } + ) link_keys.add(k) return links, link_keys @@ -373,11 +376,13 @@ def get_data(result, user_language, ignore_keys): continue k_label = get_key_label(k, user_language) if k_label: - data.append({ - 'label': k_label, - 'key': k, - 'value': v, - }) + data.append( + { + 'label': k_label, + 'key': k, + 'value': v, + } + ) data.sort(key=lambda entry: (get_key_rank(entry['key']), entry['label'])) return data diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py index b9bbfaf1b..34c8d3227 100644 --- a/searx/engines/pdbe.py +++ b/searx/engines/pdbe.py @@ -34,10 +34,7 @@ def request(query, params): params['url'] = pdbe_solr_url params['method'] = 'POST' - params['data'] = { - 'q': query, - 'wt': "json" # request response in parsable format - } + params['data'] = {'q': query, 'wt': "json"} # request response in parsable format return params @@ -53,12 +50,21 @@ def construct_body(result): if result['journal']: content = content.format( title=result['citation_title'], - authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'], - page=result['journal_page'], year=result['citation_year']) + authors=result['entry_author_list'][0], + journal=result['journal'], + volume=result['journal_volume'], + page=result['journal_page'], + year=result['citation_year'], + ) else: content = content.format( title=result['citation_title'], - authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year']) + authors=result['entry_author_list'][0], + journal='', + volume='', + page='', + year=result['release_year'], + ) img_src = pdbe_preview_url.format(pdb_id=result['pdb_id']) except (KeyError): content = None @@ -96,20 +102,21 @@ def response(resp): # since we can't construct a proper body from the response, we'll make up our own msg_superseded = gettext("This entry has been superseded by") content = '{msg_superseded}: {url} ({pdb_id})'.format( - msg_superseded=msg_superseded, - url=superseded_url, - pdb_id=result['superseded_by']) + msg_superseded=msg_superseded, url=superseded_url, pdb_id=result['superseded_by'] + ) # obsoleted entries don't have preview images img_src = None else: title, content, img_src = construct_body(result) - results.append({ - 'url': pdbe_entry_url.format(pdb_id=result['pdb_id']), - 'title': title, - 'content': content, - 'img_src': img_src - }) + results.append( + { + 'url': pdbe_entry_url.format(pdb_id=result['pdb_id']), + 'title': title, + 'content': content, + 'img_src': img_src, + } + ) return results diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index f9cd50be1..1ace14027 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -36,9 +36,7 @@ def request(query, params): language = params["language"].split("-")[0] if "all" != language and language in supported_languages: query_dict["languageOneOf"] = language - params["url"] = search_url.format( - query=urlencode(query_dict), pageno=pageno - ) + params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno) return params diff --git a/searx/engines/photon.py b/searx/engines/photon.py index f85dcad86..16ea88194 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -33,9 +33,7 @@ supported_languages = ['de', 'en', 'fr', 'it'] # do search-request def request(query, params): - params['url'] = base_url +\ - search_string.format(query=urlencode({'q': query}), - limit=number_of_results) + params['url'] = base_url + search_string.format(query=urlencode({'q': query}), limit=number_of_results) if params['language'] != 'all': language = params['language'].split('_')[0] @@ -75,59 +73,71 @@ def response(resp): # continue if invalide osm-type continue - url = result_base_url.format(osm_type=osm_type, - osm_id=properties.get('osm_id')) + url = result_base_url.format(osm_type=osm_type, osm_id=properties.get('osm_id')) - osm = {'type': osm_type, - 'id': properties.get('osm_id')} + osm = {'type': osm_type, 'id': properties.get('osm_id')} geojson = r.get('geometry') if properties.get('extent'): - boundingbox = [properties.get('extent')[3], - properties.get('extent')[1], - properties.get('extent')[0], - properties.get('extent')[2]] + boundingbox = [ + properties.get('extent')[3], + properties.get('extent')[1], + properties.get('extent')[0], + properties.get('extent')[2], + ] else: # TODO: better boundingbox calculation - boundingbox = [geojson['coordinates'][1], - geojson['coordinates'][1], - geojson['coordinates'][0], - geojson['coordinates'][0]] + boundingbox = [ + geojson['coordinates'][1], + geojson['coordinates'][1], + geojson['coordinates'][0], + geojson['coordinates'][0], + ] # address calculation address = {} # get name - if properties.get('osm_key') == 'amenity' or\ - properties.get('osm_key') == 'shop' or\ - properties.get('osm_key') == 'tourism' or\ - properties.get('osm_key') == 'leisure': + if ( + properties.get('osm_key') == 'amenity' + or properties.get('osm_key') == 'shop' + or properties.get('osm_key') == 'tourism' + or properties.get('osm_key') == 'leisure' + ): address = {'name': properties.get('name')} # add rest of adressdata, if something is already found if address.get('name'): - address.update({'house_number': properties.get('housenumber'), - 'road': properties.get('street'), - 'locality': properties.get('city', - properties.get('town', # noqa - properties.get('village'))), # noqa - 'postcode': properties.get('postcode'), - 'country': properties.get('country')}) + address.update( + { + 'house_number': properties.get('housenumber'), + 'road': properties.get('street'), + 'locality': properties.get( + 'city', properties.get('town', properties.get('village')) # noqa + ), # noqa + 'postcode': properties.get('postcode'), + 'country': properties.get('country'), + } + ) else: address = None # append result - results.append({'template': 'map.html', - 'title': title, - 'content': '', - 'longitude': geojson['coordinates'][0], - 'latitude': geojson['coordinates'][1], - 'boundingbox': boundingbox, - 'geojson': geojson, - 'address': address, - 'osm': osm, - 'url': url}) + results.append( + { + 'template': 'map.html', + 'title': title, + 'content': '', + 'longitude': geojson['coordinates'][0], + 'latitude': geojson['coordinates'][1], + 'boundingbox': boundingbox, + 'geojson': geojson, + 'address': address, + 'osm': osm, + 'url': url, + } + ) # return results return results diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index d4b94ecfa..4b0984be5 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -40,17 +40,14 @@ trackers = [ ] # piratebay specific type-definitions -search_types = {"files": "0", - "music": "100", - "videos": "200"} +search_types = {"files": "0", "music": "100", "videos": "200"} # do search-request def request(query, params): search_type = search_types.get(params["category"], "0") - params["url"] = search_url.format(search_term=quote(query), - search_type=search_type) + params["url"] = search_url.format(search_term=quote(query), search_type=search_type) return params @@ -68,8 +65,9 @@ def response(resp): # parse results for result in search_res: link = url + "description.php?id=" + result["id"] - magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\ - + "&tr=" + "&tr=".join(trackers) + magnetlink = ( + "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers) + ) params = { "url": link, @@ -77,7 +75,7 @@ def response(resp): "seed": result["seeders"], "leech": result["leechers"], "magnetlink": magnetlink, - "template": "torrent.html" + "template": "torrent.html", } # extract and convert creation date diff --git a/searx/engines/postgresql.py b/searx/engines/postgresql.py index 1eddcd519..d8bbabe27 100644 --- a/searx/engines/postgresql.py +++ b/searx/engines/postgresql.py @@ -20,6 +20,7 @@ paging = True result_template = 'key-value.html' _connection = None + def init(engine_settings): global _connection # pylint: disable=global-statement @@ -30,25 +31,24 @@ def init(engine_settings): raise ValueError('only SELECT query is supported') _connection = psycopg2.connect( - database = database, - user = username, - password = password, - host = host, - port = port, + database=database, + user=username, + password=password, + host=host, + port=port, ) + def search(query, params): query_params = {'query': query} - query_to_run = ( - query_str - + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit) - ) + query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit) with _connection: with _connection.cursor() as cur: cur.execute(query_to_run, query_params) return _fetch_results(cur) + def _fetch_results(cur): results = [] titles = [] diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 5d88d398e..27444ae24 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -15,7 +15,7 @@ about = { "wikidata_id": 'Q1540899', "official_api_documentation": { 'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/', - 'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/' + 'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/', }, "use_official_api": True, "require_api_key": False, @@ -24,8 +24,9 @@ about = { categories = ['science'] -base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'\ - + '?db=pubmed&{query}&retstart={offset}&retmax={hits}' +base_url = ( + 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}' +) # engine dependent config number_of_results = 10 @@ -36,9 +37,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=urlencode({'term': query}), - offset=offset, - hits=number_of_results) + string_args = dict(query=urlencode({'term': query}), offset=offset, hits=number_of_results) params['url'] = base_url.format(**string_args) @@ -49,8 +48,9 @@ def response(resp): results = [] # First retrieve notice of each result - pubmed_retrieve_api_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'\ - + 'db=pubmed&retmode=xml&id={pmids_string}' + pubmed_retrieve_api_url = ( + 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' + 'db=pubmed&retmode=xml&id={pmids_string}' + ) pmids_results = etree.XML(resp.content) pmids = pmids_results.xpath('//eSearchResult/IdList/Id') @@ -88,14 +88,17 @@ def response(resp): content = content[0:300] + "..." # TODO: center snippet on query term - res_dict = {'url': url, - 'title': title, - 'content': content} + res_dict = {'url': url, 'title': title, 'content': content} try: - publishedDate = datetime.strptime(entry.xpath('.//DateCreated/Year')[0].text - + '-' + entry.xpath('.//DateCreated/Month')[0].text - + '-' + entry.xpath('.//DateCreated/Day')[0].text, '%Y-%m-%d') + publishedDate = datetime.strptime( + entry.xpath('.//DateCreated/Year')[0].text + + '-' + + entry.xpath('.//DateCreated/Month')[0].text + + '-' + + entry.xpath('.//DateCreated/Day')[0].text, + '%Y-%m-%d', + ) res_dict['publishedDate'] = publishedDate except: pass diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 0312e518c..a1799491a 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -61,6 +61,7 @@ category_to_keyword = { # search-url url = 'https://api.qwant.com/v3/search/{keyword}?{query}&count={count}&offset={offset}' + def request(query, params): """Qwant search request""" keyword = category_to_keyword[categories[0]] @@ -77,10 +78,10 @@ def request(query, params): offset = min(offset, 40) params['url'] = url.format( - keyword = keyword, - query = urlencode({'q': query}), - offset = offset, - count = count, + keyword=keyword, + query=urlencode({'q': query}), + offset=offset, + count=count, ) # add language tag @@ -111,7 +112,14 @@ def response(resp): # check for an API error if search_results.get('status') != 'success': - msg = ",".join(data.get('message', ['unknown', ])) + msg = ",".join( + data.get( + 'message', + [ + 'unknown', + ], + ) + ) raise SearxEngineAPIException('API error::' + msg) # raise for other errors @@ -128,7 +136,7 @@ def response(resp): # result['items']. mainline = data.get('result', {}).get('items', []) mainline = [ - {'type' : keyword, 'items' : mainline }, + {'type': keyword, 'items': mainline}, ] # return empty array if there are no results @@ -153,11 +161,13 @@ def response(resp): if mainline_type == 'web': content = item['desc'] - results.append({ - 'title': title, - 'url': res_url, - 'content': content, - }) + results.append( + { + 'title': title, + 'url': res_url, + 'content': content, + } + ) elif mainline_type == 'news': @@ -168,23 +178,27 @@ def response(resp): img_src = None if news_media: img_src = news_media[0].get('pict', {}).get('url', None) - results.append({ - 'title': title, - 'url': res_url, - 'publishedDate': pub_date, - 'img_src': img_src, - }) + results.append( + { + 'title': title, + 'url': res_url, + 'publishedDate': pub_date, + 'img_src': img_src, + } + ) elif mainline_type == 'images': thumbnail = item['thumbnail'] img_src = item['media'] - results.append({ - 'title': title, - 'url': res_url, - 'template': 'images.html', - 'thumbnail_src': thumbnail, - 'img_src': img_src, - }) + results.append( + { + 'title': title, + 'url': res_url, + 'template': 'images.html', + 'thumbnail_src': thumbnail, + 'img_src': img_src, + } + ) elif mainline_type == 'videos': # some videos do not have a description: while qwant-video @@ -208,19 +222,18 @@ def response(resp): thumbnail = item['thumbnail'] # from some locations (DE and others?) the s2 link do # response a 'Please wait ..' but does not deliver the thumbnail - thumbnail = thumbnail.replace( - 'https://s2.qwant.com', - 'https://s1.qwant.com', 1 + thumbnail = thumbnail.replace('https://s2.qwant.com', 'https://s1.qwant.com', 1) + results.append( + { + 'title': title, + 'url': res_url, + 'content': content, + 'publishedDate': pub_date, + 'thumbnail': thumbnail, + 'template': 'videos.html', + 'length': length, + } ) - results.append({ - 'title': title, - 'url': res_url, - 'content': content, - 'publishedDate': pub_date, - 'thumbnail': thumbnail, - 'template': 'videos.html', - 'length': length, - }) return results @@ -229,8 +242,8 @@ def response(resp): def _fetch_supported_languages(resp): # list of regions is embedded in page as a js object response_text = resp.text - response_text = response_text[response_text.find('INITIAL_PROPS'):] - response_text = response_text[response_text.find('{'):response_text.find('</script>')] + response_text = response_text[response_text.find('INITIAL_PROPS') :] + response_text = response_text[response_text.find('{') : response_text.find('</script>')] regions_json = loads(response_text) diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py index 42f2858d7..ebcd83b8d 100644 --- a/searx/engines/recoll.py +++ b/searx/engines/recoll.py @@ -28,18 +28,12 @@ mount_prefix = None dl_prefix = None # embedded -embedded_url = '<{ttype} controls height="166px" ' +\ - 'src="{url}" type="{mtype}"></{ttype}>' +embedded_url = '<{ttype} controls height="166px" ' + 'src="{url}" type="{mtype}"></{ttype}>' # helper functions def get_time_range(time_range): - sw = { - 'day': 1, - 'week': 7, - 'month': 30, - 'year': 365 - } + sw = {'day': 1, 'week': 7, 'month': 30, 'year': 365} offset = sw.get(time_range, 0) if not offset: @@ -52,11 +46,9 @@ def get_time_range(time_range): def request(query, params): search_after = get_time_range(params['time_range']) search_url = base_url + 'json?{query}&highlight=0' - params['url'] = search_url.format(query=urlencode({ - 'query': query, - 'page': params['pageno'], - 'after': search_after, - 'dir': search_dir})) + params['url'] = search_url.format( + query=urlencode({'query': query, 'page': params['pageno'], 'after': search_after, 'dir': search_dir}) + ) return params @@ -76,10 +68,7 @@ def response(resp): content = '{}'.format(result['snippet']) # append result - item = {'url': url, - 'title': title, - 'content': content, - 'template': 'files.html'} + item = {'url': url, 'title': title, 'content': content, 'template': 'files.html'} if result['size']: item['size'] = int(result['size']) @@ -96,9 +85,8 @@ def response(resp): if mtype in ['audio', 'video']: item['embedded'] = embedded_url.format( - ttype=mtype, - url=quote(url.encode('utf8'), '/:'), - mtype=result['mtype']) + ttype=mtype, url=quote(url.encode('utf8'), '/:'), mtype=result['mtype'] + ) if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']: item['img_src'] = url diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py index ca6cb28a8..36d92339d 100644 --- a/searx/engines/reddit.py +++ b/searx/engines/reddit.py @@ -52,10 +52,7 @@ def response(resp): data = post['data'] # extract post information - params = { - 'url': urljoin(base_url, data['permalink']), - 'title': data['title'] - } + params = {'url': urljoin(base_url, data['permalink']), 'title': data['title']} # if thumbnail field contains a valid URL, we need to change template thumbnail = data['thumbnail'] diff --git a/searx/engines/redis_server.py b/searx/engines/redis_server.py index f9726033d..03786f81d 100644 --- a/searx/engines/redis_server.py +++ b/searx/engines/redis_server.py @@ -20,16 +20,19 @@ result_template = 'key-value.html' exact_match_only = True _redis_client = None + + def init(_engine_settings): global _redis_client # pylint: disable=global-statement _redis_client = redis.StrictRedis( - host = host, - port = port, - db = db, - password = password or None, - decode_responses = True, + host=host, + port=port, + db=db, + password=password or None, + decode_responses=True, ) + def search(query, _params): if not exact_match_only: return search_keys(query) @@ -42,21 +45,20 @@ def search(query, _params): if ' ' in query: qset, rest = query.split(' ', 1) ret = [] - for res in _redis_client.hscan_iter( - qset, match='*{}*'.format(rest) - ): - ret.append({ - res[0]: res[1], - 'template': result_template, - }) + for res in _redis_client.hscan_iter(qset, match='*{}*'.format(rest)): + ret.append( + { + res[0]: res[1], + 'template': result_template, + } + ) return ret return [] + def search_keys(query): ret = [] - for key in _redis_client.scan_iter( - match='*{}*'.format(query) - ): + for key in _redis_client.scan_iter(match='*{}*'.format(query)): key_type = _redis_client.type(key) res = None diff --git a/searx/engines/rumble.py b/searx/engines/rumble.py index 407142467..beca2570c 100644 --- a/searx/engines/rumble.py +++ b/searx/engines/rumble.py @@ -68,14 +68,16 @@ def response(resp): else: content = f"{views} views - {rumbles} rumbles" - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'publishedDate': fixed_date, - 'thumbnail': thumbnail, - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'author': author, + 'length': length, + 'template': 'videos.html', + 'publishedDate': fixed_date, + 'thumbnail': thumbnail, + } + ) return results diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py index 51c925247..ad27079dd 100644 --- a/searx/engines/scanr_structures.py +++ b/searx/engines/scanr_structures.py @@ -32,12 +32,16 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['headers']['Content-type'] = "application/json" - params['data'] = dumps({"query": query, - "searchField": "ALL", - "sortDirection": "ASC", - "sortOrder": "RELEVANCY", - "page": params['pageno'], - "pageSize": page_size}) + params['data'] = dumps( + { + "query": query, + "searchField": "ALL", + "sortDirection": "ASC", + "sortOrder": "RELEVANCY", + "page": params['pageno'], + "pageSize": page_size, + } + ) return params @@ -69,11 +73,15 @@ def response(resp): content = result['highlights'][0]['value'] # append result - results.append({'url': url + 'structure/' + result['id'], - 'title': result['label'], - # 'thumbnail': thumbnail, - 'img_src': thumbnail, - 'content': html_to_text(content)}) + results.append( + { + 'url': url + 'structure/' + result['id'], + 'title': result['label'], + # 'thumbnail': thumbnail, + 'img_src': thumbnail, + 'content': html_to_text(content), + } + ) # return results return results diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 8c1330d98..a4b0308f9 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -25,10 +25,7 @@ url = 'https://searchcode.com/' search_url = url + 'api/codesearch_I/?{query}&p={pageno}' # special code-endings which are not recognised by the file ending -code_endings = {'cs': 'c#', - 'h': 'c', - 'hpp': 'cpp', - 'cxx': 'cpp'} +code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'} # do search-request @@ -55,17 +52,21 @@ def response(resp): lines[int(line)] = code code_language = code_endings.get( - result['filename'].split('.')[-1].lower(), - result['filename'].split('.')[-1].lower()) + result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower() + ) # append result - results.append({'url': href, - 'title': title, - 'content': '', - 'repository': repo, - 'codelines': sorted(lines.items()), - 'code_language': code_language, - 'template': 'code.html'}) + results.append( + { + 'url': href, + 'title': title, + 'content': '', + 'repository': repo, + 'codelines': sorted(lines.items()), + 'code_language': code_language, + 'template': 'code.html', + } + ) # return results return results diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py index 98ef0fb79..3e9035d6f 100644 --- a/searx/engines/searx_engine.py +++ b/searx/engines/searx_engine.py @@ -37,7 +37,7 @@ def request(query, params): 'language': params['language'], 'time_range': params['time_range'], 'category': params['category'], - 'format': 'json' + 'format': 'json', } return params diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py index 297d0cf71..5d9d1a8e9 100644 --- a/searx/engines/semantic_scholar.py +++ b/searx/engines/semantic_scholar.py @@ -13,19 +13,21 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['headers']['content-type'] = 'application/json' - params['data'] = dumps({ - "queryString": query, - "page": params['pageno'], - "pageSize": 10, - "sort": "relevance", - "useFallbackRankerService": False, - "useFallbackSearchCluster": False, - "getQuerySuggestions": False, - "authors": [], - "coAuthors": [], - "venues": [], - "performTitleMatch": True, - }) + params['data'] = dumps( + { + "queryString": query, + "page": params['pageno'], + "pageSize": 10, + "sort": "relevance", + "useFallbackRankerService": False, + "useFallbackSearchCluster": False, + "getQuerySuggestions": False, + "authors": [], + "coAuthors": [], + "venues": [], + "performTitleMatch": True, + } + ) return params @@ -33,10 +35,12 @@ def response(resp): res = loads(resp.text) results = [] for result in res['results']: - results.append({ - 'url': result['primaryPaperLink']['url'], - 'title': result['title']['text'], - 'content': result['paperAbstractTruncated'] - }) + results.append( + { + 'url': result['primaryPaperLink']['url'], + 'title': result['title']['text'], + 'content': result['paperAbstractTruncated'], + } + ) return results diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py index ebad20d01..00b1b3672 100644 --- a/searx/engines/sepiasearch.py +++ b/searx/engines/sepiasearch.py @@ -31,17 +31,13 @@ supported_languages = [ ] base_url = 'https://sepiasearch.org/api/v1/search/videos' -safesearch_table = { - 0: 'both', - 1: 'false', - 2: 'false' -} +safesearch_table = {0: 'both', 1: 'false', 2: 'false'} time_range_table = { 'day': relativedelta.relativedelta(), 'week': relativedelta.relativedelta(weeks=-1), 'month': relativedelta.relativedelta(months=-1), - 'year': relativedelta.relativedelta(years=-1) + 'year': relativedelta.relativedelta(years=-1), } @@ -55,13 +51,19 @@ def minute_to_hm(minute): def request(query, params): - params['url'] = base_url + '?' + urlencode({ - 'search': query, - 'start': (params['pageno'] - 1) * 10, - 'count': 10, - 'sort': '-match', - 'nsfw': safesearch_table[params['safesearch']] - }) + params['url'] = ( + base_url + + '?' + + urlencode( + { + 'search': query, + 'start': (params['pageno'] - 1) * 10, + 'count': 10, + 'sort': '-match', + 'nsfw': safesearch_table[params['safesearch']], + } + ) + ) language = params['language'].split('-')[0] if language in supported_languages: @@ -91,14 +93,18 @@ def response(resp): length = minute_to_hm(result.get('duration')) url = result['url'] - results.append({'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'author': author, + 'length': length, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) return results diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py index 85cb25b7f..2e95b4769 100644 --- a/searx/engines/seznam.py +++ b/searx/engines/seznam.py @@ -58,10 +58,12 @@ def response(resp): if result_data is None: continue title_element = eval_xpath_getindex(result_element, './/h3/a', 0) - results.append({ - 'url': title_element.get('href'), - 'title': extract_text(title_element), - 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')), - }) + results.append( + { + 'url': title_element.get('href'), + 'title': extract_text(title_element), + 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')), + } + ) return results diff --git a/searx/engines/sjp.py b/searx/engines/sjp.py index 884fddd2d..ad498b847 100644 --- a/searx/engines/sjp.py +++ b/searx/engines/sjp.py @@ -28,9 +28,11 @@ URL = 'https://sjp.pwn.pl' SEARCH_URL = URL + '/szukaj/{query}.html' word_xpath = '//div[@class="query"]' -dict_xpath = ['//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]', - '//div[@class="wyniki sjp-wyniki sjp-anchor"]', - '//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]'] +dict_xpath = [ + '//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]', + '//div[@class="wyniki sjp-wyniki sjp-anchor"]', + '//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]', +] def request(query, params): @@ -85,9 +87,11 @@ def response(resp): infobox += "</ol>" infobox += "</ul></div>" - results.append({ - 'infobox': word, - 'content': infobox, - }) + results.append( + { + 'infobox': word, + 'content': infobox, + } + ) return results diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 7fbef9190..614b38277 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -36,14 +36,16 @@ def response(resp): search_results = loads(resp.text) for result in search_results["results"]: - results.append({ - 'infohash': result["infohash"], - 'seed': result["swarm"]["seeders"], - 'leech': result["swarm"]["leechers"], - 'title': result["title"], - 'url': "https://solidtorrents.net/view/" + result["_id"], - 'filesize': result["size"], - 'magnetlink': result["magnet"], - 'template': "torrent.html", - }) + results.append( + { + 'infohash': result["infohash"], + 'seed': result["swarm"]["seeders"], + 'leech': result["swarm"]["leechers"], + 'title': result["title"], + 'url': "https://solidtorrents.net/view/" + result["_id"], + 'filesize': result["size"], + 'magnetlink': result["magnet"], + 'template': "torrent.html", + } + ) return results diff --git a/searx/engines/solr.py b/searx/engines/solr.py index e26f19442..3e7846f8e 100644 --- a/searx/engines/solr.py +++ b/searx/engines/solr.py @@ -14,10 +14,10 @@ from searx.exceptions import SearxEngineAPIException base_url = 'http://localhost:8983' collection = '' rows = 10 -sort = '' # sorting: asc or desc -field_list = 'name' # list of field names to display on the UI -default_fields = '' # default field to query -query_fields = '' # query fields +sort = '' # sorting: asc or desc +field_list = 'name' # list of field names to display on the UI +default_fields = '' # default field to query +query_fields = '' # query fields _search_url = '' paging = True diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index d5bfc0f6f..004164e37 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -27,17 +27,21 @@ paging = True # search-url # missing attribute: user_id, app_version, app_locale url = 'https://api-v2.soundcloud.com/' -search_url = url + 'search?{query}'\ - '&variant_ids='\ - '&facet=model'\ - '&limit=20'\ - '&offset={offset}'\ - '&linked_partitioning=1'\ - '&client_id={client_id}' # noqa - -embedded_url = '<iframe width="100%" height="166" ' +\ - 'scrolling="no" frameborder="no" ' +\ - 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' +search_url = ( + url + 'search?{query}' + '&variant_ids=' + '&facet=model' + '&limit=20' + '&offset={offset}' + '&linked_partitioning=1' + '&client_id={client_id}' +) # noqa + +embedded_url = ( + '<iframe width="100%" height="166" ' + + 'scrolling="no" frameborder="no" ' + + 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' +) cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U) guest_client_id = '' @@ -75,9 +79,7 @@ def init(engine_settings=None): def request(query, params): offset = (params['pageno'] - 1) * 20 - params['url'] = search_url.format(query=urlencode({'q': query}), - offset=offset, - client_id=guest_client_id) + params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset, client_id=guest_client_id) return params @@ -98,11 +100,15 @@ def response(resp): embedded = embedded_url.format(uri=uri) # append result - results.append({'url': result['permalink_url'], - 'title': title, - 'publishedDate': publishedDate, - 'embedded': embedded, - 'content': content}) + results.append( + { + 'url': result['permalink_url'], + 'title': title, + 'publishedDate': publishedDate, + 'embedded': embedded, + 'content': content, + } + ) # return results return results diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 6816fe672..15517e3eb 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -42,9 +42,10 @@ def request(query, params): r = http_post( 'https://accounts.spotify.com/api/token', data={'grant_type': 'client_credentials'}, - headers={'Authorization': 'Basic ' + base64.b64encode( - "{}:{}".format(api_client_id, api_client_secret).encode() - ).decode()} + headers={ + 'Authorization': 'Basic ' + + base64.b64encode("{}:{}".format(api_client_id, api_client_secret).encode()).decode() + }, ) j = loads(r.text) params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))} @@ -63,18 +64,12 @@ def response(resp): if result['type'] == 'track': title = result['name'] url = result['external_urls']['spotify'] - content = '{} - {} - {}'.format( - result['artists'][0]['name'], - result['album']['name'], - result['name']) + content = '{} - {} - {}'.format(result['artists'][0]['name'], result['album']['name'], result['name']) embedded = embedded_url.format(audioid=result['id']) # append result - results.append({'url': url, - 'title': title, - 'embedded': embedded, - 'content': content}) + results.append({'url': url, 'title': title, 'embedded': embedded, 'content': content}) # return results return results diff --git a/searx/engines/springer.py b/searx/engines/springer.py index 246e59b44..512d71e5e 100644 --- a/searx/engines/springer.py +++ b/searx/engines/springer.py @@ -26,15 +26,11 @@ api_key = 'unset' base_url = 'https://api.springernature.com/metadata/json?' + def request(query, params): if api_key == 'unset': raise SearxEngineAPIException('missing Springer-Nature API key') - args = urlencode({ - 'q' : query, - 's' : nb_per_page * (params['pageno'] - 1), - 'p' : nb_per_page, - 'api_key' : api_key - }) + args = urlencode({'q': query, 's': nb_per_page * (params['pageno'] - 1), 'p': nb_per_page, 'api_key': api_key}) params['url'] = base_url + args logger.debug("query_url --> %s", params['url']) return params @@ -50,21 +46,27 @@ def response(resp): content += "..." published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') - metadata = [record[x] for x in [ - 'publicationName', - 'identifier', - 'contentType', - ] if record.get(x) is not None] + metadata = [ + record[x] + for x in [ + 'publicationName', + 'identifier', + 'contentType', + ] + if record.get(x) is not None + ] metadata = ' / '.join(metadata) if record.get('startingPage') and record.get('endingPage') is not None: metadata += " (%(startingPage)s-%(endingPage)s)" % record - results.append({ - 'title': record['title'], - 'url': record['url'][0]['value'].replace('http://', 'https://', 1), - 'content' : content, - 'publishedDate' : published, - 'metadata' : metadata - }) + results.append( + { + 'title': record['title'], + 'url': record['url'][0]['value'].replace('http://', 'https://', 1), + 'content': content, + 'publishedDate': published, + 'metadata': metadata, + } + ) return results diff --git a/searx/engines/sqlite.py b/searx/engines/sqlite.py index 43a85efbb..6de12f5fe 100644 --- a/searx/engines/sqlite.py +++ b/searx/engines/sqlite.py @@ -47,9 +47,9 @@ def search(query, params): query_params = { 'query': query, - 'wildcard': r'%' + query.replace(' ', r'%') + r'%', + 'wildcard': r'%' + query.replace(' ', r'%') + r'%', 'limit': limit, - 'offset': (params['pageno'] - 1) * limit + 'offset': (params['pageno'] - 1) * limit, } query_to_run = query_str + ' LIMIT :limit OFFSET :offset' @@ -59,7 +59,7 @@ def search(query, params): col_names = [cn[0] for cn in cur.description] for row in cur.fetchall(): - item = dict( zip(col_names, map(str, row)) ) + item = dict(zip(col_names, map(str, row))) item['template'] = result_template logger.debug("append result --> %s", item) results.append(item) diff --git a/searx/engines/stackexchange.py b/searx/engines/stackexchange.py index 34cba687c..99615b1a7 100644 --- a/searx/engines/stackexchange.py +++ b/searx/engines/stackexchange.py @@ -23,26 +23,30 @@ paging = True pagesize = 10 api_site = 'stackoverflow' -api_sort= 'activity' +api_sort = 'activity' api_order = 'desc' # https://api.stackexchange.com/docs/advanced-search search_api = 'https://api.stackexchange.com/2.3/search/advanced?' + def request(query, params): - args = urlencode({ - 'q' : query, - 'page' : params['pageno'], - 'pagesize' : pagesize, - 'site' : api_site, - 'sort' : api_sort, - 'order': 'desc', - }) + args = urlencode( + { + 'q': query, + 'page': params['pageno'], + 'pagesize': pagesize, + 'site': api_site, + 'sort': api_sort, + 'order': 'desc', + } + ) params['url'] = search_api + args return params + def response(resp): results = [] @@ -56,10 +60,12 @@ def response(resp): content += ' // is answered' content += " // score: %s" % result['score'] - results.append({ - 'url': "https://%s.com/q/%s" % (api_site, result['question_id']), - 'title': html.unescape(result['title']), - 'content': html.unescape(content), - }) + results.append( + { + 'url': "https://%s.com/q/%s" % (api_site, result['question_id']), + 'title': html.unescape(result['title']), + 'content': html.unescape(content), + } + ) return results diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index e71310be6..65d90debe 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -101,7 +101,7 @@ def response(resp): # check if search result starts with something like: "2 Sep 2014 ... " if re.match(r"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content): date_pos = content.find('...') + 4 - date_string = content[0:date_pos - 5] + date_string = content[0 : date_pos - 5] # fix content string content = content[date_pos:] @@ -113,7 +113,7 @@ def response(resp): # check if search result starts with something like: "5 days ago ... " elif re.match(r"^[0-9]+ days? ago \.\.\. ", content): date_pos = content.find('...') + 4 - date_string = content[0:date_pos - 5] + date_string = content[0 : date_pos - 5] # calculate datetime published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) @@ -123,15 +123,10 @@ def response(resp): if published_date: # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'publishedDate': published_date}) + results.append({'url': url, 'title': title, 'content': content, 'publishedDate': published_date}) else: # append result - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) # return results return results @@ -152,7 +147,7 @@ def _fetch_supported_languages(resp): 'malayam': 'ml', 'norsk': 'nb', 'sinhalese': 'si', - 'sudanese': 'su' + 'sudanese': 'su', } # get the English name of every language known by babel diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index 0d62453a9..b01de38c1 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -56,11 +56,7 @@ def response(resp): name_row = rows[i] links = name_row.xpath('./td[@class="desc-top"]/a') - params = { - 'template': 'torrent.html', - 'url': links[-1].attrib.get('href'), - 'title': extract_text(links[-1]) - } + params = {'template': 'torrent.html', 'url': links[-1].attrib.get('href'), 'title': extract_text(links[-1])} # I have not yet seen any torrents without magnet links, but # it's better to be prepared to stumble upon one some day if len(links) == 2: diff --git a/searx/engines/torznab.py b/searx/engines/torznab.py index 960d1ee90..a48017c13 100644 --- a/searx/engines/torznab.py +++ b/searx/engines/torznab.py @@ -35,10 +35,12 @@ api_key = '' # https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories torznab_categories = [] -def init(engine_settings=None): # pylint: disable=unused-argument + +def init(engine_settings=None): # pylint: disable=unused-argument if len(base_url) < 1: raise ValueError('missing torznab base_url') + def request(query, params): search_url = base_url + '?t=search&q={search_query}' @@ -48,13 +50,12 @@ def request(query, params): search_url += '&cat={torznab_categories}' params['url'] = search_url.format( - search_query = quote(query), - api_key = api_key, - torznab_categories = ",".join([str(x) for x in torznab_categories]) + search_query=quote(query), api_key=api_key, torznab_categories=",".join([str(x) for x in torznab_categories]) ) return params + def response(resp): results = [] @@ -103,8 +104,7 @@ def response(resp): result["publishedDate"] = None try: - result["publishedDate"] = datetime.strptime( - get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z') + result["publishedDate"] = datetime.strptime(get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z') except (ValueError, TypeError) as e: logger.debug("ignore exception (publishedDate): %s", e) @@ -134,9 +134,7 @@ def get_property(item, property_name): def get_torznab_attr(item, attr_name): element = item.find( './/torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name), - { - 'torznab': 'http://torznab.com/schemas/2015/feed' - } + {'torznab': 'http://torznab.com/schemas/2015/feed'}, ) if element is not None: diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 8d67ca0bb..62ade49e2 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -28,24 +28,25 @@ def request(query, params): key_form = '&key=' + api_key else: key_form = '' - params['url'] = url.format(from_lang=params['from_lang'][1], - to_lang=params['to_lang'][1], - query=params['query'], - key=key_form) + params['url'] = url.format( + from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query'], key=key_form + ) return params def response(resp): results = [] - results.append({ - 'url': web_url.format( - from_lang=resp.search_params['from_lang'][2], - to_lang=resp.search_params['to_lang'][2], - query=resp.search_params['query']), - 'title': '[{0}-{1}] {2}'.format( - resp.search_params['from_lang'][1], - resp.search_params['to_lang'][1], - resp.search_params['query']), - 'content': resp.json()['responseData']['translatedText'] - }) + results.append( + { + 'url': web_url.format( + from_lang=resp.search_params['from_lang'][2], + to_lang=resp.search_params['to_lang'][2], + query=resp.search_params['query'], + ), + 'title': '[{0}-{1}] {2}'.format( + resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], resp.search_params['query'] + ), + 'content': resp.json()['responseData']['translatedText'], + } + ) return results diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 1445b4cec..1967fefd2 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -26,23 +26,13 @@ paging = True def clean_url(url): parsed = urlparse(url) - query = [(k, v) for (k, v) - in parse_qsl(parsed.query) if k not in ['ixid', 's']] + query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']] - return urlunparse(( - parsed.scheme, - parsed.netloc, - parsed.path, - parsed.params, - urlencode(query), - parsed.fragment - )) + return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, urlencode(query), parsed.fragment)) def request(query, params): - params['url'] = search_url + urlencode({ - 'query': query, 'page': params['pageno'], 'per_page': page_size - }) + params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size}) logger.debug("query_url --> %s", params['url']) return params @@ -53,13 +43,15 @@ def response(resp): if 'results' in json_data: for result in json_data['results']: - results.append({ - 'template': 'images.html', - 'url': clean_url(result['links']['html']), - 'thumbnail_src': clean_url(result['urls']['thumb']), - 'img_src': clean_url(result['urls']['raw']), - 'title': result.get('alt_description') or 'unknown', - 'content': result.get('description') or '' - }) + results.append( + { + 'template': 'images.html', + 'url': clean_url(result['links']['html']), + 'thumbnail_src': clean_url(result['urls']['thumb']), + 'img_src': clean_url(result['urls']['raw']), + 'title': result.get('alt_description') or 'unknown', + 'content': result.get('description') or '', + } + ) return results diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index 824579256..52d201eac 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -25,15 +25,16 @@ paging = True base_url = 'https://vimeo.com/' search_url = base_url + '/search/page:{pageno}?{query}' -embedded_url = '<iframe data-src="https://player.vimeo.com/video/{videoid}" ' +\ - 'width="540" height="304" frameborder="0" ' +\ - 'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>' +embedded_url = ( + '<iframe data-src="https://player.vimeo.com/video/{videoid}" ' + + 'width="540" height="304" frameborder="0" ' + + 'webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>' +) # do search-request def request(query, params): - params['url'] = search_url.format(pageno=params['pageno'], - query=urlencode({'q': query})) + params['url'] = search_url.format(pageno=params['pageno'], query=urlencode({'q': query})) return params @@ -56,13 +57,17 @@ def response(resp): embedded = embedded_url.format(videoid=videoid) # append result - results.append({'url': url, - 'title': title, - 'content': '', - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': '', + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 59413499c..c8881d299 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -14,7 +14,10 @@ from searx.data import WIKIDATA_UNITS from searx.network import post, get from searx.utils import match_language, searx_useragent, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom -from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.wikipedia import ( + _fetch_supported_languages, + supported_languages_url, +) # NOQA # pylint: disable=unused-import # about about = { @@ -112,10 +115,7 @@ replace_http_by_https = get_string_replaces_function({'http:': 'https:'}) def get_headers(): # user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits - return { - 'Accept': 'application/sparql-results+json', - 'User-Agent': searx_useragent() - } + return {'Accept': 'application/sparql-results+json', 'User-Agent': searx_useragent()} def get_label_for_entity(entity_id, language): @@ -211,9 +211,9 @@ def get_results(attribute_result, attributes, language): results.append({'title': infobox_title, 'url': url}) # update the infobox_id with the wikipedia URL # first the local wikipedia URL, and as fallback the english wikipedia URL - if attribute_type == WDArticle\ - and ((attribute.language == 'en' and infobox_id_lang is None) - or attribute.language != 'en'): + if attribute_type == WDArticle and ( + (attribute.language == 'en' and infobox_id_lang is None) or attribute.language != 'en' + ): infobox_id_lang = attribute.language infobox_id = url elif attribute_type == WDImageAttribute: @@ -232,13 +232,11 @@ def get_results(attribute_result, attributes, language): osm_zoom = area_to_osm_zoom(area) if area else 19 url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom) if url: - infobox_urls.append({'title': attribute.get_label(language), - 'url': url, - 'entity': attribute.name}) + infobox_urls.append({'title': attribute.get_label(language), 'url': url, 'entity': attribute.name}) else: - infobox_attributes.append({'label': attribute.get_label(language), - 'value': value, - 'entity': attribute.name}) + infobox_attributes.append( + {'label': attribute.get_label(language), 'value': value, 'entity': attribute.name} + ) if infobox_id: infobox_id = replace_http_by_https(infobox_id) @@ -246,22 +244,19 @@ def get_results(attribute_result, attributes, language): # add the wikidata URL at the end infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']}) - if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and\ - len(infobox_content) == 0: - results.append({ - 'url': infobox_urls[0]['url'], - 'title': infobox_title, - 'content': infobox_content - }) + if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and len(infobox_content) == 0: + results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content}) else: - results.append({ - 'infobox': infobox_title, - 'id': infobox_id, - 'content': infobox_content, - 'img_src': img_src, - 'urls': infobox_urls, - 'attributes': infobox_attributes - }) + results.append( + { + 'infobox': infobox_title, + 'id': infobox_id, + 'content': infobox_content, + 'img_src': img_src, + 'urls': infobox_urls, + 'attributes': infobox_attributes, + } + ) return results @@ -271,13 +266,14 @@ def get_query(query, language): where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes])) wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes])) group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes])) - query = QUERY_TEMPLATE\ - .replace('%QUERY%', sparql_string_escape(query))\ - .replace('%SELECT%', ' '.join(select))\ - .replace('%WHERE%', '\n '.join(where))\ - .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))\ - .replace('%GROUP_BY%', ' '.join(group_by))\ + query = ( + QUERY_TEMPLATE.replace('%QUERY%', sparql_string_escape(query)) + .replace('%SELECT%', ' '.join(select)) + .replace('%WHERE%', '\n '.join(where)) + .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label)) + .replace('%GROUP_BY%', ' '.join(group_by)) .replace('%LANGUAGE%', language) + ) return query, attributes @@ -303,90 +299,98 @@ def get_attributes(language): attributes.append(WDDateAttribute(name)) # Dates - for p in ['P571', # inception date - 'P576', # dissolution date - 'P580', # start date - 'P582', # end date - 'P569', # date of birth - 'P570', # date of death - 'P619', # date of spacecraft launch - 'P620']: # date of spacecraft landing + for p in [ + 'P571', # inception date + 'P576', # dissolution date + 'P580', # start date + 'P582', # end date + 'P569', # date of birth + 'P570', # date of death + 'P619', # date of spacecraft launch + 'P620', + ]: # date of spacecraft landing add_date(p) - for p in ['P27', # country of citizenship - 'P495', # country of origin - 'P17', # country - 'P159']: # headquarters location + for p in [ + 'P27', # country of citizenship + 'P495', # country of origin + 'P17', # country + 'P159', + ]: # headquarters location add_label(p) # Places - for p in ['P36', # capital - 'P35', # head of state - 'P6', # head of government - 'P122', # basic form of government - 'P37']: # official language + for p in [ + 'P36', # capital + 'P35', # head of state + 'P6', # head of government + 'P122', # basic form of government + 'P37', + ]: # official language add_label(p) - add_value('P1082') # population + add_value('P1082') # population add_amount('P2046') # area - add_amount('P281') # postal code - add_label('P38') # currency + add_amount('P281') # postal code + add_label('P38') # currency add_amount('P2048') # heigth (building) # Media - for p in ['P400', # platform (videogames, computing) - 'P50', # author - 'P170', # creator - 'P57', # director - 'P175', # performer - 'P178', # developer - 'P162', # producer - 'P176', # manufacturer - 'P58', # screenwriter - 'P272', # production company - 'P264', # record label - 'P123', # publisher - 'P449', # original network - 'P750', # distributed by - 'P86']: # composer + for p in [ + 'P400', # platform (videogames, computing) + 'P50', # author + 'P170', # creator + 'P57', # director + 'P175', # performer + 'P178', # developer + 'P162', # producer + 'P176', # manufacturer + 'P58', # screenwriter + 'P272', # production company + 'P264', # record label + 'P123', # publisher + 'P449', # original network + 'P750', # distributed by + 'P86', + ]: # composer add_label(p) - add_date('P577') # publication date - add_label('P136') # genre (music, film, artistic...) - add_label('P364') # original language - add_value('P212') # ISBN-13 - add_value('P957') # ISBN-10 - add_label('P275') # copyright license - add_label('P277') # programming language - add_value('P348') # version - add_label('P840') # narrative location + add_date('P577') # publication date + add_label('P136') # genre (music, film, artistic...) + add_label('P364') # original language + add_value('P212') # ISBN-13 + add_value('P957') # ISBN-10 + add_label('P275') # copyright license + add_label('P277') # programming language + add_value('P348') # version + add_label('P840') # narrative location # Languages - add_value('P1098') # number of speakers - add_label('P282') # writing system - add_label('P1018') # language regulatory body - add_value('P218') # language code (ISO 639-1) + add_value('P1098') # number of speakers + add_label('P282') # writing system + add_label('P1018') # language regulatory body + add_value('P218') # language code (ISO 639-1) # Other - add_label('P169') # ceo - add_label('P112') # founded by - add_label('P1454') # legal form (company, organization) - add_label('P137') # operator (service, facility, ...) - add_label('P1029') # crew members (tripulation) - add_label('P225') # taxon name - add_value('P274') # chemical formula - add_label('P1346') # winner (sports, contests, ...) - add_value('P1120') # number of deaths - add_value('P498') # currency code (ISO 4217) + add_label('P169') # ceo + add_label('P112') # founded by + add_label('P1454') # legal form (company, organization) + add_label('P137') # operator (service, facility, ...) + add_label('P1029') # crew members (tripulation) + add_label('P225') # taxon name + add_value('P274') # chemical formula + add_label('P1346') # winner (sports, contests, ...) + add_value('P1120') # number of deaths + add_value('P498') # currency code (ISO 4217) # URL - add_url('P856', official=True) # official website + add_url('P856', official=True) # official website attributes.append(WDArticle(language)) # wikipedia (user language) if not language.startswith('en'): attributes.append(WDArticle('en')) # wikipedia (english) - add_url('P1324') # source code repository - add_url('P1581') # blog + add_url('P1324') # source code repository + add_url('P1581') # blog add_url('P434', url_id='musicbrainz_artist') add_url('P435', url_id='musicbrainz_work') add_url('P436', url_id='musicbrainz_release_group') @@ -402,11 +406,11 @@ def get_attributes(language): attributes.append(WDGeoAttribute('P625')) # Image - add_image('P15', priority=1, url_id='wikimedia_image') # route map - add_image('P242', priority=2, url_id='wikimedia_image') # locator map - add_image('P154', priority=3, url_id='wikimedia_image') # logo - add_image('P18', priority=4, url_id='wikimedia_image') # image - add_image('P41', priority=5, url_id='wikimedia_image') # flag + add_image('P15', priority=1, url_id='wikimedia_image') # route map + add_image('P242', priority=2, url_id='wikimedia_image') # locator map + add_image('P154', priority=3, url_id='wikimedia_image') # logo + add_image('P18', priority=4, url_id='wikimedia_image') # image + add_image('P41', priority=5, url_id='wikimedia_image') # flag add_image('P2716', priority=6, url_id='wikimedia_image') # collage add_image('P2910', priority=7, url_id='wikimedia_image') # icon @@ -415,7 +419,7 @@ def get_attributes(language): class WDAttribute: - __slots__ = 'name', + __slots__ = ('name',) def __init__(self, name): self.name = name @@ -443,14 +447,15 @@ class WDAttribute: class WDAmountAttribute(WDAttribute): - def get_select(self): return '?{name} ?{name}Unit'.replace('{name}', self.name) def get_where(self): return """ OPTIONAL { ?item p:{name} ?{name}Node . ?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} . - OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace('{name}', self.name) + OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -484,7 +489,9 @@ class WDArticle(WDAttribute): return """OPTIONAL { ?article{language} schema:about ?item ; schema:inLanguage "{language}" ; schema:isPartOf <https://{language}.wikipedia.org/> ; - schema:name ?articleName{language} . }""".replace('{language}', self.language) + schema:name ?articleName{language} . }""".replace( + '{language}', self.language + ) def get_group_by(self): return self.get_select() @@ -495,7 +502,6 @@ class WDArticle(WDAttribute): class WDLabelAttribute(WDAttribute): - def get_select(self): return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name) @@ -526,14 +532,13 @@ class WDURLAttribute(WDAttribute): value = value.split(',')[0] url_id = self.url_id if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE): - value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):] + value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE) :] url_id = 'wikimedia_image' return get_external_url(url_id, value) return value class WDGeoAttribute(WDAttribute): - def get_label(self, language): return "OpenStreetMap" @@ -543,7 +548,9 @@ class WDGeoAttribute(WDAttribute): def get_where(self): return """OPTIONAL { ?item p:{name}/psv:{name} [ wikibase:geoLatitude ?{name}Lat ; - wikibase:geoLongitude ?{name}Long ] }""".replace('{name}', self.name) + wikibase:geoLongitude ?{name}Long ] }""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -565,7 +572,7 @@ class WDGeoAttribute(WDAttribute): class WDImageAttribute(WDURLAttribute): - __slots__ = 'priority', + __slots__ = ('priority',) def __init__(self, name, url_id=None, priority=100): super().__init__(name, url_id) @@ -573,7 +580,6 @@ class WDImageAttribute(WDURLAttribute): class WDDateAttribute(WDAttribute): - def get_select(self): return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name) @@ -587,7 +593,9 @@ class WDDateAttribute(WDAttribute): wikibase:timePrecision ?{name}timePrecision ; wikibase:timeTimezone ?{name}timeZone ; wikibase:timeCalendarModel ?{name}timeCalendar ] . } - hint:Prior hint:rangeSafe true;""".replace('{name}', self.name) + hint:Prior hint:rangeSafe true;""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -619,11 +627,12 @@ class WDDateAttribute(WDAttribute): def format_13(self, value, locale): timestamp = isoparse(value) # precision: minute - return get_datetime_format(format, locale=locale) \ - .replace("'", "") \ - .replace('{0}', format_time(timestamp, 'full', tzinfo=None, - locale=locale)) \ + return ( + get_datetime_format(format, locale=locale) + .replace("'", "") + .replace('{0}', format_time(timestamp, 'full', tzinfo=None, locale=locale)) .replace('{1}', format_date(timestamp, 'short', locale=locale)) + ) def format_14(self, value, locale): # precision: second. @@ -644,7 +653,7 @@ class WDDateAttribute(WDAttribute): '11': ('format_11', 0), # day '12': ('format_13', 0), # hour (not supported by babel, display minute) '13': ('format_13', 0), # minute - '14': ('format_14', 0) # second + '14': ('format_14', 0), # second } def get_str(self, result, language): diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 5e34db9a7..cc806a8de 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -39,8 +39,7 @@ def request(query, params): query = query.title() language = url_lang(params['language']) - params['url'] = search_url.format(title=quote(query), - language=language) + params['url'] = search_url.format(title=quote(query), language=language) if params['language'].lower() in language_variants.get(language, []): params['headers']['Accept-Language'] = params['language'].lower() @@ -63,8 +62,10 @@ def response(resp): except: pass else: - if api_result['type'] == 'https://mediawiki.org/wiki/HyperSwitch/errors/bad_request' \ - and api_result['detail'] == 'title-invalid-characters': + if ( + api_result['type'] == 'https://mediawiki.org/wiki/HyperSwitch/errors/bad_request' + and api_result['detail'] == 'title-invalid-characters' + ): return [] raise_for_httperror(resp) @@ -81,11 +82,15 @@ def response(resp): results.append({'url': wikipedia_link, 'title': title}) - results.append({'infobox': title, - 'id': wikipedia_link, - 'content': api_result.get('extract', ''), - 'img_src': api_result.get('thumbnail', {}).get('source'), - 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) + results.append( + { + 'infobox': title, + 'id': wikipedia_link, + 'content': api_result.get('extract', ''), + 'img_src': api_result.get('thumbnail', {}).get('source'), + 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}], + } + ) return results diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 9c84e2809..1c882c582 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -36,8 +36,7 @@ img_alt_xpath = './@alt' # pods to display as image in infobox # this pods do return a plaintext, but they look better and are more useful as images -image_pods = {'VisualRepresentation', - 'Illustration'} +image_pods = {'VisualRepresentation', 'Illustration'} # do search-request @@ -50,15 +49,17 @@ def request(query, params): # replace private user area characters to make text legible def replace_pua_chars(text): - pua_chars = {'\uf522': '\u2192', # rigth arrow - '\uf7b1': '\u2115', # set of natural numbers - '\uf7b4': '\u211a', # set of rational numbers - '\uf7b5': '\u211d', # set of real numbers - '\uf7bd': '\u2124', # set of integer numbers - '\uf74c': 'd', # differential - '\uf74d': '\u212f', # euler's number - '\uf74e': 'i', # imaginary number - '\uf7d9': '='} # equals sign + pua_chars = { + '\uf522': '\u2192', # rigth arrow + '\uf7b1': '\u2115', # set of natural numbers + '\uf7b4': '\u211a', # set of rational numbers + '\uf7b5': '\u211d', # set of real numbers + '\uf7bd': '\u2124', # set of integer numbers + '\uf74c': 'd', # differential + '\uf74d': '\u212f', # euler's number + '\uf74e': 'i', # imaginary number + '\uf7d9': '=', + } # equals sign for k, v in pua_chars.items(): text = text.replace(k, v) @@ -112,9 +113,12 @@ def response(resp): result_chunks.append({'label': pod_title, 'value': content}) elif image: - result_chunks.append({'label': pod_title, - 'image': {'src': image[0].xpath(img_src_xpath)[0], - 'alt': image[0].xpath(img_alt_xpath)[0]}}) + result_chunks.append( + { + 'label': pod_title, + 'image': {'src': image[0].xpath(img_src_xpath)[0], 'alt': image[0].xpath(img_alt_xpath)[0]}, + } + ) if not result_chunks: return [] @@ -122,13 +126,15 @@ def response(resp): title = "Wolfram|Alpha (%s)" % infobox_title # append infobox - results.append({'infobox': infobox_title, - 'attributes': result_chunks, - 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]}) + results.append( + { + 'infobox': infobox_title, + 'attributes': result_chunks, + 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}], + } + ) # append link to site - results.append({'url': resp.request.headers['Referer'], - 'title': title, - 'content': result_content}) + results.append({'url': resp.request.headers['Referer'], 'title': title, 'content': result_content}) return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 1f2cfa4e6..bad25602a 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -22,30 +22,29 @@ about = { # search-url url = 'https://www.wolframalpha.com/' -search_url = url + 'input/json.jsp'\ - '?async=false'\ - '&banners=raw'\ - '&debuggingdata=false'\ - '&format=image,plaintext,imagemap,minput,moutput'\ - '&formattimeout=2'\ - '&{query}'\ - '&output=JSON'\ - '&parsetimeout=2'\ - '&proxycode={token}'\ - '&scantimeout=0.5'\ - '&sponsorcategories=true'\ +search_url = ( + url + 'input/json.jsp' + '?async=false' + '&banners=raw' + '&debuggingdata=false' + '&format=image,plaintext,imagemap,minput,moutput' + '&formattimeout=2' + '&{query}' + '&output=JSON' + '&parsetimeout=2' + '&proxycode={token}' + '&scantimeout=0.5' + '&sponsorcategories=true' '&statemethod=deploybutton' +) referer_url = url + 'input/?{query}' -token = {'value': '', - 'last_updated': None} +token = {'value': '', 'last_updated': None} # pods to display as image in infobox # this pods do return a plaintext, but they look better and are more useful as images -image_pods = {'VisualRepresentation', - 'Illustration', - 'Symbol'} +image_pods = {'VisualRepresentation', 'Illustration', 'Symbol'} # seems, wolframalpha resets its token in every hour @@ -115,12 +114,20 @@ def response(resp): if not result_chunks: return [] - results.append({'infobox': infobox_title, - 'attributes': result_chunks, - 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]}) - - results.append({'url': resp.request.headers['Referer'], - 'title': 'Wolfram|Alpha (' + infobox_title + ')', - 'content': result_content}) + results.append( + { + 'infobox': infobox_title, + 'attributes': result_chunks, + 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}], + } + ) + + results.append( + { + 'url': resp.request.headers['Referer'], + 'title': 'Wolfram|Alpha (' + infobox_title + ')', + 'content': result_content, + } + ) return results diff --git a/searx/engines/wordnik.py b/searx/engines/wordnik.py index 0c3785cfb..21eaeccc3 100644 --- a/searx/engines/wordnik.py +++ b/searx/engines/wordnik.py @@ -48,7 +48,7 @@ def response(resp): def_abbr = extract_text(def_item.xpath('.//abbr')).strip() def_text = extract_text(def_item).strip() if def_abbr: - def_text = def_text[len(def_abbr):].strip() + def_text = def_text[len(def_abbr) :].strip() src_defs.append((def_abbr, def_text)) definitions.append((src_text, src_defs)) @@ -66,9 +66,11 @@ def response(resp): infobox += f"<li><i>{def_abbr}</i> {def_text}</li>" infobox += "</ul>" - results.append({ - 'infobox': word, - 'content': infobox, - }) + results.append( + { + 'infobox': word, + 'content': infobox, + } + ) return results diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index 96b8d680c..f6b82944d 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -46,12 +46,16 @@ def response(resp): thumbnail_src = urljoin(gallery_url, eval_xpath_getindex(link, './/img', 0).attrib['src']) # append result - results.append({'url': url, - 'title': title, - 'img_src': thumbnail_src, - 'content': '', - 'thumbnail_src': thumbnail_src, - 'template': 'images.html'}) + results.append( + { + 'url': url, + 'title': title, + 'img_src': thumbnail_src, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'template': 'images.html', + } + ) # return results return results diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 08677b708..2737bf94a 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -56,7 +56,7 @@ Replacements are: """ -lang_all='en' +lang_all = 'en' '''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is selected. ''' @@ -110,9 +110,9 @@ requested by the user, the URL paramter is an empty string. The time_range_map = { 'day': 24, - 'week': 24*7, - 'month': 24*30, - 'year': 24*365, + 'week': 24 * 7, + 'month': 24 * 30, + 'year': 24 * 365, } '''Maps time range value from user to ``{time_range_val}`` in :py:obj:`time_range_url`. @@ -129,11 +129,7 @@ time_range_map = { safe_search_support = False '''Engine supports safe-search.''' -safe_search_map = { - 0: '&filter=none', - 1: '&filter=moderate', - 2: '&filter=strict' -} +safe_search_map = {0: '&filter=none', 1: '&filter=moderate', 2: '&filter=strict'} '''Maps safe-search value to ``{safe_search}`` in :py:obj:`search_url`. .. code:: yaml @@ -146,10 +142,9 @@ safe_search_map = { ''' -def request(query, params): - '''Build request parameters (see :ref:`engine request`). - ''' +def request(query, params): + '''Build request parameters (see :ref:`engine request`).''' lang = lang_all if params['language'] != 'all': lang = params['language'][:2] @@ -167,8 +162,8 @@ def request(query, params): 'query': urlencode({'q': query})[2:], 'lang': lang, 'pageno': (params['pageno'] - 1) * page_size + first_page_num, - 'time_range' : time_range, - 'safe_search' : safe_search, + 'time_range': time_range, + 'safe_search': safe_search, } params['url'] = search_url.format(**fargs) @@ -176,10 +171,9 @@ def request(query, params): return params -def response(resp): - '''Scrap *results* from the response (see :ref:`engine results`). - ''' +def response(resp): + '''Scrap *results* from the response (see :ref:`engine results`).''' results = [] dom = html.fromstring(resp.text) is_onion = 'onions' in categories @@ -200,10 +194,7 @@ def response(resp): # add alternative cached url if available if cached_xpath: - tmp_result['cached_url'] = ( - cached_url - + extract_text(eval_xpath_list(result, cached_xpath, min_len=1)) - ) + tmp_result['cached_url'] = cached_url + extract_text(eval_xpath_list(result, cached_xpath, min_len=1)) if is_onion: tmp_result['is_onion'] = True @@ -213,31 +204,27 @@ def response(resp): else: if cached_xpath: for url, title, content, cached in zip( - (extract_url(x, search_url) for - x in eval_xpath_list(dom, url_xpath)), + (extract_url(x, search_url) for x in eval_xpath_list(dom, url_xpath)), map(extract_text, eval_xpath_list(dom, title_xpath)), map(extract_text, eval_xpath_list(dom, content_xpath)), - map(extract_text, eval_xpath_list(dom, cached_xpath)) + map(extract_text, eval_xpath_list(dom, cached_xpath)), ): - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'cached_url': cached_url + cached, 'is_onion': is_onion - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'cached_url': cached_url + cached, + 'is_onion': is_onion, + } + ) else: for url, title, content in zip( - (extract_url(x, search_url) for - x in eval_xpath_list(dom, url_xpath)), + (extract_url(x, search_url) for x in eval_xpath_list(dom, url_xpath)), map(extract_text, eval_xpath_list(dom, title_xpath)), - map(extract_text, eval_xpath_list(dom, content_xpath)) + map(extract_text, eval_xpath_list(dom, content_xpath)), ): - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'is_onion': is_onion - }) + results.append({'url': url, 'title': title, 'content': content, 'is_onion': is_onion}) if suggestion_xpath: for suggestion in eval_xpath(dom, suggestion_xpath): diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index fbd99c47b..12e7305db 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -30,18 +30,16 @@ http_digest_auth_pass = "" # search-url base_url = 'http://localhost:8090' -search_url = '/yacysearch.json?{query}'\ - '&startRecord={offset}'\ - '&maximumRecords={limit}'\ - '&contentdom={search_type}'\ - '&resource=global' +search_url = ( + '/yacysearch.json?{query}' + '&startRecord={offset}' + '&maximumRecords={limit}' + '&contentdom={search_type}' + '&resource=global' +) # yacy specific type-definitions -search_types = {'general': 'text', - 'images': 'image', - 'files': 'app', - 'music': 'audio', - 'videos': 'video'} +search_types = {'general': 'text', 'images': 'image', 'files': 'app', 'music': 'audio', 'videos': 'video'} # do search-request @@ -49,11 +47,9 @@ def request(query, params): offset = (params['pageno'] - 1) * number_of_results search_type = search_types.get(params.get('category'), '0') - params['url'] = base_url +\ - search_url.format(query=urlencode({'query': query}), - offset=offset, - limit=number_of_results, - search_type=search_type) + params['url'] = base_url + search_url.format( + query=urlencode({'query': query}), offset=offset, limit=number_of_results, search_type=search_type + ) if http_digest_auth_user and http_digest_auth_pass: params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass) @@ -93,21 +89,29 @@ def response(resp): continue # append result - results.append({'url': result_url, - 'title': result['title'], - 'content': '', - 'img_src': result['image'], - 'template': 'images.html'}) + results.append( + { + 'url': result_url, + 'title': result['title'], + 'content': '', + 'img_src': result['image'], + 'template': 'images.html', + } + ) # parse general results else: publishedDate = parser.parse(result['pubDate']) # append result - results.append({'url': result['link'], - 'title': result['title'], - 'content': html_to_text(result['description']), - 'publishedDate': publishedDate}) + results.append( + { + 'url': result['link'], + 'title': result['title'], + 'content': html_to_text(result['description']), + 'publishedDate': publishedDate, + } + ) # TODO parse video, audio and file results diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index bd6e6721c..08bde6665 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -50,59 +50,59 @@ language_aliases = { } lang2domain = { - 'zh_chs' : 'hk.search.yahoo.com', - 'zh_cht' : 'tw.search.yahoo.com', - 'en' : 'search.yahoo.com', - - 'bg' : 'search.yahoo.com', - 'cs' : 'search.yahoo.com', - 'da' : 'search.yahoo.com', - 'el' : 'search.yahoo.com', - 'et' : 'search.yahoo.com', - 'he' : 'search.yahoo.com', - 'hr' : 'search.yahoo.com', - 'ja' : 'search.yahoo.com', - 'ko' : 'search.yahoo.com', - 'sk' : 'search.yahoo.com', - 'sl' : 'search.yahoo.com', - + 'zh_chs': 'hk.search.yahoo.com', + 'zh_cht': 'tw.search.yahoo.com', + 'en': 'search.yahoo.com', + 'bg': 'search.yahoo.com', + 'cs': 'search.yahoo.com', + 'da': 'search.yahoo.com', + 'el': 'search.yahoo.com', + 'et': 'search.yahoo.com', + 'he': 'search.yahoo.com', + 'hr': 'search.yahoo.com', + 'ja': 'search.yahoo.com', + 'ko': 'search.yahoo.com', + 'sk': 'search.yahoo.com', + 'sl': 'search.yahoo.com', } """Map language to domain""" + def _get_language(params): lang = language_aliases.get(params['language']) if lang is None: - lang = match_language( - params['language'], supported_languages, language_aliases - ) + lang = match_language(params['language'], supported_languages, language_aliases) lang = lang.split('-')[0] - logger.debug("params['language']: %s --> %s" , params['language'], lang) + logger.debug("params['language']: %s --> %s", params['language'], lang) return lang + def request(query, params): """build request""" offset = (params['pageno'] - 1) * 7 + 1 - lang = _get_language(params) - age, btf = time_range_dict.get( - params['time_range'], ('', '')) - - args = urlencode({ - 'p' : query, - 'ei' : 'UTF-8', - 'fl' : 1, - 'vl' : 'lang_' + lang, - 'btf' : btf, - 'fr2' : 'time', - 'age' : age, - 'b' : offset, - 'xargs' :0 - }) + lang = _get_language(params) + age, btf = time_range_dict.get(params['time_range'], ('', '')) + + args = urlencode( + { + 'p': query, + 'ei': 'UTF-8', + 'fl': 1, + 'vl': 'lang_' + lang, + 'btf': btf, + 'fr2': 'time', + 'age': age, + 'b': offset, + 'xargs': 0, + } + ) domain = lang2domain.get(lang, '%s.search.yahoo.com' % lang) params['url'] = 'https://%s/search?%s' % (domain, args) return params + def parse_url(url_string): """remove yahoo-specific tracking-url""" @@ -121,6 +121,7 @@ def parse_url(url_string): end = min(endpositions) return unquote(url_string[start:end]) + def response(resp): """parse response""" @@ -140,18 +141,12 @@ def response(resp): offset = len(extract_text(title.xpath('span'))) title = extract_text(title)[offset:] - content = eval_xpath_getindex( - result, './/div[contains(@class, "compText")]', 0, default='' - ) + content = eval_xpath_getindex(result, './/div[contains(@class, "compText")]', 0, default='') if content: content = extract_text(content) # append result - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) for suggestion in eval_xpath_list(dom, '//div[contains(@class, "AlsoTry")]//table//a'): # append suggestion @@ -167,6 +162,6 @@ def _fetch_supported_languages(resp): offset = len('lang_') for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'): - supported_languages.append( val[offset:] ) + supported_languages.append(val[offset:]) return supported_languages diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index ec07cd408..06f090f74 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -39,36 +39,31 @@ paging = True categories = ['news'] # search-url -search_url = ( - 'https://news.search.yahoo.com/search' - '?{query}&b={offset}' - ) +search_url = 'https://news.search.yahoo.com/search' '?{query}&b={offset}' AGO_RE = re.compile(r'([0-9]+)\s*(year|month|week|day|minute|hour)') AGO_TIMEDELTA = { - 'minute': timedelta(minutes=1), - 'hour': timedelta(hours=1), - 'day': timedelta(days=1), - 'week': timedelta(days=7), - 'month': timedelta(days=30), - 'year': timedelta(days=365), + 'minute': timedelta(minutes=1), + 'hour': timedelta(hours=1), + 'day': timedelta(days=1), + 'week': timedelta(days=7), + 'month': timedelta(days=30), + 'year': timedelta(days=365), } + def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 - params['url'] = search_url.format( - offset = offset, - query = urlencode({'p': query}) - ) + params['url'] = search_url.format(offset=offset, query=urlencode({'p': query})) logger.debug("query_url --> %s", params['url']) return params + def response(resp): results = [] dom = html.fromstring(resp.text) - # parse results for result in eval_xpath_list(dom, '//ol[contains(@class,"searchCenterMiddle")]//li'): @@ -80,12 +75,7 @@ def response(resp): content = extract_text(result.xpath('.//p')) img_src = eval_xpath_getindex(result, './/img/@data-src', 0, None) - item = { - 'url': url, - 'title': title, - 'content': content, - 'img_src' : img_src - } + item = {'url': url, 'title': title, 'content': content, 'img_src': img_src} pub_date = extract_text(result.xpath('.//span[contains(@class,"s-time")]')) ago = AGO_RE.search(pub_date) diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py index ed27db07b..52db45960 100644 --- a/searx/engines/youtube_api.py +++ b/searx/engines/youtube_api.py @@ -27,17 +27,18 @@ api_key = None base_url = 'https://www.googleapis.com/youtube/v3/search' search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}' -embedded_url = '<iframe width="540" height="304" ' +\ - 'data-src="https://www.youtube-nocookie.com/embed/{videoid}" ' +\ - 'frameborder="0" allowfullscreen></iframe>' +embedded_url = ( + '<iframe width="540" height="304" ' + + 'data-src="https://www.youtube-nocookie.com/embed/{videoid}" ' + + 'frameborder="0" allowfullscreen></iframe>' +) base_youtube_url = 'https://www.youtube.com/watch?v=' # do search-request def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), - api_key=api_key) + params['url'] = search_url.format(query=urlencode({'q': query}), api_key=api_key) # add language tag if specified if params['language'] != 'all': @@ -79,13 +80,17 @@ def response(resp): embedded = embedded_url.format(videoid=videoid) # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 68b75bc72..239830cc7 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -30,14 +30,13 @@ search_url = base_url + '?search_query={query}&page={page}' time_range_url = '&sp=EgII{time_range}%253D%253D' # the key seems to be constant next_page_url = 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' -time_range_dict = {'day': 'Ag', - 'week': 'Aw', - 'month': 'BA', - 'year': 'BQ'} +time_range_dict = {'day': 'Ag', 'week': 'Aw', 'month': 'BA', 'year': 'BQ'} -embedded_url = '<iframe width="540" height="304" ' +\ - 'data-src="https://www.youtube-nocookie.com/embed/{videoid}" ' +\ - 'frameborder="0" allowfullscreen></iframe>' +embedded_url = ( + '<iframe width="540" height="304" ' + + 'data-src="https://www.youtube-nocookie.com/embed/{videoid}" ' + + 'frameborder="0" allowfullscreen></iframe>' +) base_youtube_url = 'https://www.youtube.com/watch?v=' @@ -51,10 +50,12 @@ def request(query, params): else: params['url'] = next_page_url params['method'] = 'POST' - params['data'] = dumps({ - 'context': {"client": {"clientName": "WEB", "clientVersion": "2.20210310.12.01"}}, - 'continuation': params['engine_data']['next_page_token'], - }) + params['data'] = dumps( + { + 'context': {"client": {"clientName": "WEB", "clientVersion": "2.20210310.12.01"}}, + 'continuation': params['engine_data']['next_page_token'], + } + ) params['headers']['Content-Type'] = 'application/json' params['headers']['Cookie'] = "CONSENT=YES+cb.%s-17-p0.en+F+941;" % datetime.now().strftime("%Y%m%d") @@ -71,34 +72,42 @@ def response(resp): def parse_next_page_response(response_text): results = [] result_json = loads(response_text) - for section in (result_json['onResponseReceivedCommands'][0] - .get('appendContinuationItemsAction')['continuationItems'][0] - .get('itemSectionRenderer')['contents']): + for section in ( + result_json['onResponseReceivedCommands'][0] + .get('appendContinuationItemsAction')['continuationItems'][0] + .get('itemSectionRenderer')['contents'] + ): if 'videoRenderer' not in section: continue section = section['videoRenderer'] content = "-" if 'descriptionSnippet' in section: content = ' '.join(x['text'] for x in section['descriptionSnippet']['runs']) - results.append({ - 'url': base_youtube_url + section['videoId'], - 'title': ' '.join(x['text'] for x in section['title']['runs']), - 'content': content, - 'author': section['ownerText']['runs'][0]['text'], - 'length': section['lengthText']['simpleText'], - 'template': 'videos.html', - 'embedded': embedded_url.format(videoid=section['videoId']), - 'thumbnail': section['thumbnail']['thumbnails'][-1]['url'], - }) + results.append( + { + 'url': base_youtube_url + section['videoId'], + 'title': ' '.join(x['text'] for x in section['title']['runs']), + 'content': content, + 'author': section['ownerText']['runs'][0]['text'], + 'length': section['lengthText']['simpleText'], + 'template': 'videos.html', + 'embedded': embedded_url.format(videoid=section['videoId']), + 'thumbnail': section['thumbnail']['thumbnails'][-1]['url'], + } + ) try: - token = result_json['onResponseReceivedCommands'][0]\ - .get('appendContinuationItemsAction')['continuationItems'][1]\ - .get('continuationItemRenderer')['continuationEndpoint']\ + token = ( + result_json['onResponseReceivedCommands'][0] + .get('appendContinuationItemsAction')['continuationItems'][1] + .get('continuationItemRenderer')['continuationEndpoint'] .get('continuationCommand')['token'] - results.append({ - "engine_data": token, - "key": "next_page_token", - }) + ) + results.append( + { + "engine_data": token, + "key": "next_page_token", + } + ) except: pass @@ -107,26 +116,32 @@ def parse_next_page_response(response_text): def parse_first_page_response(response_text): results = [] - results_data = response_text[response_text.find('ytInitialData'):] - results_data = results_data[results_data.find('{'):results_data.find(';</script>')] + results_data = response_text[response_text.find('ytInitialData') :] + results_data = results_data[results_data.find('{') : results_data.find(';</script>')] results_json = loads(results_data) if results_data else {} - sections = results_json.get('contents', {})\ - .get('twoColumnSearchResultsRenderer', {})\ - .get('primaryContents', {})\ - .get('sectionListRenderer', {})\ - .get('contents', []) + sections = ( + results_json.get('contents', {}) + .get('twoColumnSearchResultsRenderer', {}) + .get('primaryContents', {}) + .get('sectionListRenderer', {}) + .get('contents', []) + ) for section in sections: if "continuationItemRenderer" in section: - next_page_token = section["continuationItemRenderer"]\ - .get("continuationEndpoint", {})\ - .get("continuationCommand", {})\ + next_page_token = ( + section["continuationItemRenderer"] + .get("continuationEndpoint", {}) + .get("continuationCommand", {}) .get("token", "") + ) if next_page_token: - results.append({ - "engine_data": next_page_token, - "key": "next_page_token", - }) + results.append( + { + "engine_data": next_page_token, + "key": "next_page_token", + } + ) for video_container in section.get('itemSectionRenderer', {}).get('contents', []): video = video_container.get('videoRenderer', {}) videoid = video.get('videoId') @@ -140,14 +155,18 @@ def parse_first_page_response(response_text): length = get_text_from_json(video.get('lengthText', {})) # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'author': author, + 'length': length, + 'template': 'videos.html', + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/zlibrary.py b/searx/engines/zlibrary.py index 180e9e355..81d93ac84 100644 --- a/searx/engines/zlibrary.py +++ b/searx/engines/zlibrary.py @@ -31,25 +31,23 @@ categories = ['files'] paging = True base_url = '' + def init(engine_settings=None): - global base_url # pylint: disable=global-statement + global base_url # pylint: disable=global-statement if "base_url" not in engine_settings: resp = http_get('https://z-lib.org', timeout=5.0) if resp.ok: dom = html.fromstring(resp.text) - base_url = "https:" + extract_text(eval_xpath(dom, - './/a[contains(@class, "domain-check-link") and @data-mode="books"]/@href' - )) + base_url = "https:" + extract_text( + eval_xpath(dom, './/a[contains(@class, "domain-check-link") and @data-mode="books"]/@href') + ) logger.debug("using base_url: %s" % base_url) def request(query, params): search_url = base_url + '/s/{search_query}/?page={pageno}' - params['url'] = search_url.format( - search_query=quote(query), - pageno=params['pageno'] - ) + params['url'] = search_url.format(search_query=quote(query), pageno=params['pageno']) return params @@ -60,36 +58,34 @@ def response(resp): for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'): result = {} - result["url"] = base_url + \ - item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0] + result["url"] = base_url + item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0] result["title"] = extract_text(eval_xpath(item, './/*[@itemprop="name"]')) - year = extract_text(eval_xpath( - item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]')) + year = extract_text( + eval_xpath(item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]') + ) if year: year = '(%s) ' % year - result["content"] = "{year}{authors}. {publisher}. Language: {language}. {file_type}. \ + result[ + "content" + ] = "{year}{authors}. {publisher}. Language: {language}. {file_type}. \ Book rating: {book_rating}, book quality: {book_quality}".format( - year = year, - authors = extract_text(eval_xpath(item, './/div[@class="authors"]')), - publisher = extract_text(eval_xpath(item, './/div[@title="Publisher"]')), - file_type = extract_text( - eval_xpath( - item, - './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]')), - language = extract_text( - eval_xpath( - item, - './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]')), - book_rating = extract_text( - eval_xpath( - item, './/span[contains(@class, "book-rating-interest-score")]')), - book_quality = extract_text( - eval_xpath( - item, './/span[contains(@class, "book-rating-quality-score")]')), - ) + year=year, + authors=extract_text(eval_xpath(item, './/div[@class="authors"]')), + publisher=extract_text(eval_xpath(item, './/div[@title="Publisher"]')), + file_type=extract_text( + eval_xpath(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]') + ), + language=extract_text( + eval_xpath( + item, './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]' + ) + ), + book_rating=extract_text(eval_xpath(item, './/span[contains(@class, "book-rating-interest-score")]')), + book_quality=extract_text(eval_xpath(item, './/span[contains(@class, "book-rating-quality-score")]')), + ) result["img_src"] = extract_text(eval_xpath(item, './/img[contains(@class, "cover")]/@data-src')) diff --git a/searx/exceptions.py b/searx/exceptions.py index 67a282da2..1b106d40c 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -21,7 +21,6 @@ class SearxException(Exception): class SearxParameterException(SearxException): - def __init__(self, name, value): if value == '' or value is None: message = 'Empty ' + name + ' parameter' diff --git a/searx/external_urls.py b/searx/external_urls.py index 11c6a32d9..2657dba4b 100644 --- a/searx/external_urls.py +++ b/searx/external_urls.py @@ -8,7 +8,7 @@ IMDB_PREFIX_TO_URL_ID = { 'mn': 'imdb_name', 'ch': 'imdb_character', 'co': 'imdb_company', - 'ev': 'imdb_event' + 'ev': 'imdb_event', } HTTP_WIKIMEDIA_IMAGE = 'http://commons.wikimedia.org/wiki/Special:FilePath/' @@ -20,9 +20,9 @@ def get_imdb_url_id(imdb_item_id): def get_wikimedia_image_id(url): if url.startswith(HTTP_WIKIMEDIA_IMAGE): - return url[len(HTTP_WIKIMEDIA_IMAGE):] + return url[len(HTTP_WIKIMEDIA_IMAGE) :] if url.startswith('File:'): - return url[len('File:'):] + return url[len('File:') :] return url @@ -52,10 +52,12 @@ def get_external_url(url_id, item_id, alternative="default"): def get_earth_coordinates_url(latitude, longitude, osm_zoom, alternative='default'): - url = get_external_url('map', None, alternative)\ - .replace('${latitude}', str(latitude))\ - .replace('${longitude}', str(longitude))\ + url = ( + get_external_url('map', None, alternative) + .replace('${latitude}', str(latitude)) + .replace('${longitude}', str(longitude)) .replace('${zoom}', str(osm_zoom)) + ) return url diff --git a/searx/flaskfix.py b/searx/flaskfix.py index 47aabfa53..326c4b981 100644 --- a/searx/flaskfix.py +++ b/searx/flaskfix.py @@ -29,6 +29,7 @@ class ReverseProxyPathFix: :param wsgi_app: the WSGI application ''' + # pylint: disable=too-few-public-methods def __init__(self, wsgi_app): @@ -58,7 +59,7 @@ class ReverseProxyPathFix: environ['SCRIPT_NAME'] = script_name path_info = environ['PATH_INFO'] if path_info.startswith(script_name): - environ['PATH_INFO'] = path_info[len(script_name):] + environ['PATH_INFO'] = path_info[len(script_name) :] scheme = self.scheme or environ.get('HTTP_X_SCHEME', '') if scheme: diff --git a/searx/languages.py b/searx/languages.py index c44eb0b9e..1f157e517 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- # list of language codes # this file is generated automatically by utils/fetch_languages.py -language_codes = \ -( ('af-ZA', 'Afrikaans', '', 'Afrikaans'), +language_codes = ( + ('af-ZA', 'Afrikaans', '', 'Afrikaans'), ('ar-EG', 'العربية', '', 'Arabic'), ('be-BY', 'Беларуская', '', 'Belarusian'), ('bg-BG', 'Български', '', 'Bulgarian'), @@ -74,4 +74,5 @@ language_codes = \ ('zh', '中文', '', 'Chinese'), ('zh-CN', '中文', '中国', 'Chinese'), ('zh-HK', '中文', '中國香港特別行政區', 'Chinese'), - ('zh-TW', '中文', '台灣', 'Chinese'))
\ No newline at end of file + ('zh-TW', '中文', '台灣', 'Chinese'), +) diff --git a/searx/locales.py b/searx/locales.py index b791f35f3..62f64204f 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -51,11 +51,10 @@ def _get_locale_name(locale, locale_name): def initialize_locales(directory): - """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`. - """ + """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.""" for dirname in sorted(os.listdir(directory)): # Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations - if not os.path.isdir( os.path.join(directory, dirname, 'LC_MESSAGES') ): + if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')): continue locale_name = dirname.replace('_', '-') info = LOCALE_NAMES.get(locale_name) diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index 995f182af..37f0ba121 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -12,11 +12,19 @@ from searx.engines import engines from .models import HistogramStorage, CounterStorage from .error_recorder import count_error, count_exception, errors_per_engines -__all__ = ["initialize", - "get_engines_stats", "get_engine_errors", - "histogram", "histogram_observe", "histogram_observe_time", - "counter", "counter_inc", "counter_add", - "count_error", "count_exception"] +__all__ = [ + "initialize", + "get_engines_stats", + "get_engine_errors", + "histogram", + "histogram_observe", + "histogram_observe_time", + "counter", + "counter_inc", + "counter_add", + "count_error", + "count_exception", +] ENDPOINTS = {'search'} @@ -72,7 +80,7 @@ def initialize(engine_names=None): # max_timeout = max of all the engine.timeout max_timeout = 2 - for engine_name in (engine_names or engines): + for engine_name in engine_names or engines: if engine_name in engines: max_timeout = max(max_timeout, engines[engine_name].timeout) @@ -81,7 +89,7 @@ def initialize(engine_names=None): histogram_size = int(1.5 * max_timeout / histogram_width) # engines - for engine_name in (engine_names or engines): + for engine_name in engine_names or engines: # search count counter_storage.configure('engine', engine_name, 'search', 'count', 'sent') counter_storage.configure('engine', engine_name, 'search', 'count', 'successful') @@ -112,17 +120,19 @@ def get_engine_errors(engline_name_list): r = [] for context, count in sorted_context_count_list: percentage = round(20 * count / sent_search_count) * 5 - r.append({ - 'filename': context.filename, - 'function': context.function, - 'line_no': context.line_no, - 'code': context.code, - 'exception_classname': context.exception_classname, - 'log_message': context.log_message, - 'log_parameters': context.log_parameters, - 'secondary': context.secondary, - 'percentage': percentage, - }) + r.append( + { + 'filename': context.filename, + 'function': context.function, + 'line_no': context.line_no, + 'code': context.code, + 'exception_classname': context.exception_classname, + 'log_message': context.log_message, + 'log_parameters': context.log_parameters, + 'secondary': context.secondary, + 'percentage': percentage, + } + ) result[engine_name] = sorted(r, reverse=True, key=lambda d: d['percentage']) return result diff --git a/searx/metrics/error_recorder.py b/searx/metrics/error_recorder.py index 37594e5e8..76d27f64f 100644 --- a/searx/metrics/error_recorder.py +++ b/searx/metrics/error_recorder.py @@ -3,8 +3,12 @@ import inspect from json import JSONDecodeError from urllib.parse import urlparse from httpx import HTTPError, HTTPStatusError -from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException, - SearxEngineAccessDeniedException) +from searx.exceptions import ( + SearxXPathSyntaxException, + SearxEngineXPathException, + SearxEngineAPIException, + SearxEngineAccessDeniedException, +) from searx import searx_parent_dir from searx.engines import engines @@ -14,8 +18,16 @@ errors_per_engines = {} class ErrorContext: - __slots__ = ('filename', 'function', 'line_no', 'code', 'exception_classname', - 'log_message', 'log_parameters', 'secondary') + __slots__ = ( + 'filename', + 'function', + 'line_no', + 'code', + 'exception_classname', + 'log_message', + 'log_parameters', + 'secondary', + ) def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary): self.filename = filename @@ -30,19 +42,41 @@ class ErrorContext: def __eq__(self, o) -> bool: if not isinstance(o, ErrorContext): return False - return self.filename == o.filename and self.function == o.function and self.line_no == o.line_no\ - and self.code == o.code and self.exception_classname == o.exception_classname\ - and self.log_message == o.log_message and self.log_parameters == o.log_parameters \ + return ( + self.filename == o.filename + and self.function == o.function + and self.line_no == o.line_no + and self.code == o.code + and self.exception_classname == o.exception_classname + and self.log_message == o.log_message + and self.log_parameters == o.log_parameters and self.secondary == o.secondary + ) def __hash__(self): - return hash((self.filename, self.function, self.line_no, self.code, self.exception_classname, self.log_message, - self.log_parameters, self.secondary)) + return hash( + ( + self.filename, + self.function, + self.line_no, + self.code, + self.exception_classname, + self.log_message, + self.log_parameters, + self.secondary, + ) + ) def __repr__(self): - return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".\ - format(self.filename, self.line_no, self.code, self.exception_classname, self.log_message, - self.log_parameters, self.secondary) + return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".format( + self.filename, + self.line_no, + self.code, + self.exception_classname, + self.log_message, + self.log_parameters, + self.secondary, + ) def add_error_context(engine_name: str, error_context: ErrorContext) -> None: @@ -68,8 +102,9 @@ def get_hostname(exc: HTTPError) -> typing.Optional[None]: return urlparse(url).netloc -def get_request_exception_messages(exc: HTTPError)\ - -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]: +def get_request_exception_messages( + exc: HTTPError, +) -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]: url = None status_code = None reason = None @@ -90,11 +125,11 @@ def get_request_exception_messages(exc: HTTPError)\ def get_messages(exc, filename) -> typing.Tuple: if isinstance(exc, JSONDecodeError): - return (exc.msg, ) + return (exc.msg,) if isinstance(exc, TypeError): - return (str(exc), ) + return (str(exc),) if isinstance(exc, ValueError) and 'lxml' in filename: - return (str(exc), ) + return (str(exc),) if isinstance(exc, HTTPError): return get_request_exception_messages(exc) if isinstance(exc, SearxXPathSyntaxException): @@ -102,9 +137,9 @@ def get_messages(exc, filename) -> typing.Tuple: if isinstance(exc, SearxEngineXPathException): return (exc.xpath_str, exc.message) if isinstance(exc, SearxEngineAPIException): - return (str(exc.args[0]), ) + return (str(exc.args[0]),) if isinstance(exc, SearxEngineAccessDeniedException): - return (exc.message, ) + return (exc.message,) return () @@ -121,7 +156,7 @@ def get_error_context(framerecords, exception_classname, log_message, log_parame searx_frame = get_trace(framerecords) filename = searx_frame.filename if filename.startswith(searx_parent_dir): - filename = filename[len(searx_parent_dir) + 1:] + filename = filename[len(searx_parent_dir) + 1 :] function = searx_frame.function line_no = searx_frame.lineno code = searx_frame.code_context[0].strip() @@ -140,8 +175,9 @@ def count_exception(engine_name: str, exc: Exception, secondary: bool = False) - del framerecords -def count_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, - secondary: bool = False) -> None: +def count_error( + engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False +) -> None: framerecords = list(reversed(inspect.stack()[1:])) try: error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary) diff --git a/searx/metrics/models.py b/searx/metrics/models.py index 8936a51e3..d42569b7f 100644 --- a/searx/metrics/models.py +++ b/searx/metrics/models.py @@ -58,7 +58,7 @@ class Histogram: @property def quartile_percentage(self): - ''' Quartile in percentage ''' + '''Quartile in percentage''' with self._lock: if self._count > 0: return [int(q * 100 / self._count) for q in self._quartiles] diff --git a/searx/network/__init__.py b/searx/network/__init__.py index 82959e355..7d02a0014 100644 --- a/searx/network/__init__.py +++ b/searx/network/__init__.py @@ -35,7 +35,7 @@ except ImportError: self._count.release() def get(self): - if not self._count.acquire(True): #pylint: disable=consider-using-with + if not self._count.acquire(True): # pylint: disable=consider-using-with raise Empty return self._queue.popleft() @@ -43,6 +43,7 @@ except ImportError: THREADLOCAL = threading.local() """Thread-local data is data for thread specific values.""" + def reset_time_for_thread(): THREADLOCAL.total_time = 0 @@ -187,10 +188,7 @@ async def stream_chunk_to_queue(network, queue, method, url, **kwargs): def _stream_generator(method, url, **kwargs): queue = SimpleQueue() network = get_context_network() - future = asyncio.run_coroutine_threadsafe( - stream_chunk_to_queue(network, queue, method, url, **kwargs), - get_loop() - ) + future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(network, queue, method, url, **kwargs), get_loop()) # yield chunks obj_or_exception = queue.get() @@ -203,10 +201,7 @@ def _stream_generator(method, url, **kwargs): def _close_response_method(self): - asyncio.run_coroutine_threadsafe( - self.aclose(), - get_loop() - ) + asyncio.run_coroutine_threadsafe(self.aclose(), get_loop()) # reach the end of _self.generator ( _stream_generator ) to an avoid memory leak. # it makes sure that : # * the httpx response is closed (see the stream_chunk_to_queue function) diff --git a/searx/network/client.py b/searx/network/client.py index a6cec352d..cd1e41460 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -10,12 +10,7 @@ import anyio import httpcore import httpx from httpx_socks import AsyncProxyTransport -from python_socks import ( - parse_proxy_url, - ProxyConnectionError, - ProxyTimeoutError, - ProxyError -) +from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError from searx import logger @@ -41,9 +36,7 @@ TRANSPORT_KWARGS = { # pylint: disable=protected-access -async def close_connections_for_url( - connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL -): +async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL): origin = httpcore._utils.url_to_origin(url) logger.debug('Drop connections for %r', origin) @@ -54,6 +47,8 @@ async def close_connections_for_url( await connection.aclose() except httpx.NetworkError as e: logger.warning('Error closing an existing connection', exc_info=e) + + # pylint: enable=protected-access @@ -67,9 +62,7 @@ def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport): """Block HTTP request""" - async def handle_async_request( - self, method, url, headers=None, stream=None, extensions=None - ): + async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): raise httpx.UnsupportedProtocol('HTTP protocol is disabled') @@ -83,9 +76,7 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): Note: AsyncProxyTransport inherit from AsyncConnectionPool """ - async def handle_async_request( - self, method, url, headers=None, stream=None, extensions=None - ): + async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): retry = 2 while retry > 0: retry -= 1 @@ -116,9 +107,7 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): """Fix httpx.AsyncHTTPTransport""" - async def handle_async_request( - self, method, url, headers=None, stream=None, extensions=None - ): + async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): retry = 2 while retry > 0: retry -= 1 @@ -152,14 +141,17 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit rdns = False socks5h = 'socks5h://' if proxy_url.startswith(socks5h): - proxy_url = 'socks5://' + proxy_url[len(socks5h):] + proxy_url = 'socks5://' + proxy_url[len(socks5h) :] rdns = True proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url) verify = get_sslcontexts(proxy_url, None, True, False, http2) if verify is True else verify return AsyncProxyTransportFixed( - proxy_type=proxy_type, proxy_host=proxy_host, proxy_port=proxy_port, - username=proxy_username, password=proxy_password, + proxy_type=proxy_type, + proxy_host=proxy_host, + proxy_port=proxy_port, + username=proxy_username, + password=proxy_password, rdns=rdns, loop=get_loop(), verify=verify, @@ -169,7 +161,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit max_keepalive_connections=limit.max_keepalive_connections, keepalive_expiry=limit.keepalive_expiry, retries=retries, - **TRANSPORT_KWARGS + **TRANSPORT_KWARGS, ) @@ -183,36 +175,40 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries): proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, limits=limit, retries=retries, - **TRANSPORT_KWARGS + **TRANSPORT_KWARGS, ) def new_client( - # pylint: disable=too-many-arguments - enable_http, verify, enable_http2, - max_connections, max_keepalive_connections, keepalive_expiry, - proxies, local_address, retries, max_redirects, hook_log_response ): + # pylint: disable=too-many-arguments + enable_http, + verify, + enable_http2, + max_connections, + max_keepalive_connections, + keepalive_expiry, + proxies, + local_address, + retries, + max_redirects, + hook_log_response, +): limit = httpx.Limits( max_connections=max_connections, max_keepalive_connections=max_keepalive_connections, - keepalive_expiry=keepalive_expiry + keepalive_expiry=keepalive_expiry, ) # See https://www.python-httpx.org/advanced/#routing mounts = {} for pattern, proxy_url in proxies.items(): if not enable_http and pattern.startswith('http://'): continue - if (proxy_url.startswith('socks4://') - or proxy_url.startswith('socks5://') - or proxy_url.startswith('socks5h://') - ): + if proxy_url.startswith('socks4://') or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://'): mounts[pattern] = get_transport_for_socks_proxy( verify, enable_http2, local_address, proxy_url, limit, retries ) else: - mounts[pattern] = get_transport( - verify, enable_http2, local_address, proxy_url, limit, retries - ) + mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries) if not enable_http: mounts['http://'] = AsyncHTTPTransportNoHttp() @@ -221,7 +217,7 @@ def new_client( event_hooks = None if hook_log_response: - event_hooks = {'response': [ hook_log_response ]} + event_hooks = {'response': [hook_log_response]} return httpx.AsyncClient( transport=transport, diff --git a/searx/network/network.py b/searx/network/network.py index 613b9ff27..9e14e14bd 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -31,39 +31,49 @@ PROXY_PATTERN_MAPPING = { 'socks5h:': 'socks5h://', } -ADDRESS_MAPPING = { - 'ipv4': '0.0.0.0', - 'ipv6': '::' -} +ADDRESS_MAPPING = {'ipv4': '0.0.0.0', 'ipv6': '::'} class Network: __slots__ = ( - 'enable_http', 'verify', 'enable_http2', - 'max_connections', 'max_keepalive_connections', 'keepalive_expiry', - 'local_addresses', 'proxies', 'using_tor_proxy', 'max_redirects', 'retries', 'retry_on_http_error', - '_local_addresses_cycle', '_proxies_cycle', '_clients', '_logger' + 'enable_http', + 'verify', + 'enable_http2', + 'max_connections', + 'max_keepalive_connections', + 'keepalive_expiry', + 'local_addresses', + 'proxies', + 'using_tor_proxy', + 'max_redirects', + 'retries', + 'retry_on_http_error', + '_local_addresses_cycle', + '_proxies_cycle', + '_clients', + '_logger', ) _TOR_CHECK_RESULT = {} def __init__( - # pylint: disable=too-many-arguments - self, - enable_http=True, - verify=True, - enable_http2=False, - max_connections=None, - max_keepalive_connections=None, - keepalive_expiry=None, - proxies=None, - using_tor_proxy=False, - local_addresses=None, - retries=0, - retry_on_http_error=None, - max_redirects=30, - logger_name=None): + # pylint: disable=too-many-arguments + self, + enable_http=True, + verify=True, + enable_http2=False, + max_connections=None, + max_keepalive_connections=None, + keepalive_expiry=None, + proxies=None, + using_tor_proxy=False, + local_addresses=None, + retries=0, + retry_on_http_error=None, + max_redirects=30, + logger_name=None, + ): self.enable_http = enable_http self.verify = verify @@ -144,9 +154,7 @@ class Network: response_line = f"{response.http_version} {status}" content_type = response.headers.get("Content-Type") content_type = f' ({content_type})' if content_type else '' - self._logger.debug( - f'HTTP Request: {request.method} {request.url} "{response_line}"{content_type}' - ) + self._logger.debug(f'HTTP Request: {request.method} {request.url} "{response_line}"{content_type}') @staticmethod async def check_tor_proxy(client: httpx.AsyncClient, proxies) -> bool: @@ -187,7 +195,7 @@ class Network: local_address, 0, max_redirects, - hook_log_response + hook_log_response, ) if self.using_tor_proxy and not await self.check_tor_proxy(client, proxies): await client.aclose() @@ -201,6 +209,7 @@ class Network: await client.aclose() except httpx.HTTPError: pass + await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False) @staticmethod @@ -214,7 +223,8 @@ class Network: def is_valid_respones(self, response): # pylint: disable=too-many-boolean-expressions - if ((self.retry_on_http_error is True and 400 <= response.status_code <= 599) + if ( + (self.retry_on_http_error is True and 400 <= response.status_code <= 599) or (isinstance(self.retry_on_http_error, list) and response.status_code in self.retry_on_http_error) or (isinstance(self.retry_on_http_error, int) and response.status_code == self.retry_on_http_error) ): @@ -269,6 +279,7 @@ def check_network_configuration(): network._logger.exception('Error') # pylint: disable=protected-access exception_count += 1 return exception_count + future = asyncio.run_coroutine_threadsafe(check(), get_loop()) exception_count = future.result() if exception_count > 0: @@ -279,6 +290,7 @@ def initialize(settings_engines=None, settings_outgoing=None): # pylint: disable=import-outside-toplevel) from searx.engines import engines from searx import settings + # pylint: enable=import-outside-toplevel) settings_engines = settings_engines or settings['engines'] diff --git a/searx/network/raise_for_httperror.py b/searx/network/raise_for_httperror.py index a2f554614..414074977 100644 --- a/searx/network/raise_for_httperror.py +++ b/searx/network/raise_for_httperror.py @@ -10,13 +10,14 @@ from searx.exceptions import ( SearxEngineAccessDeniedException, ) + def is_cloudflare_challenge(resp): if resp.status_code in [429, 503]: - if (('__cf_chl_jschl_tk__=' in resp.text) - or ('/cdn-cgi/challenge-platform/' in resp.text - and 'orchestrate/jsch/v1' in resp.text - and 'window._cf_chl_enter(' in resp.text - )): + if ('__cf_chl_jschl_tk__=' in resp.text) or ( + '/cdn-cgi/challenge-platform/' in resp.text + and 'orchestrate/jsch/v1' in resp.text + and 'window._cf_chl_enter(' in resp.text + ): return True if resp.status_code == 403 and '__cf_chl_captcha_tk__=' in resp.text: return True @@ -32,21 +33,14 @@ def raise_for_cloudflare_captcha(resp): if is_cloudflare_challenge(resp): # https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha- # suspend for 2 weeks - raise SearxEngineCaptchaException( - message='Cloudflare CAPTCHA', - suspended_time=3600 * 24 * 15 - ) + raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15) if is_cloudflare_firewall(resp): - raise SearxEngineAccessDeniedException( - message='Cloudflare Firewall', suspended_time=3600 * 24 - ) + raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24) def raise_for_recaptcha(resp): - if (resp.status_code == 503 - and '"https://www.google.com/recaptcha/' in resp.text - ): + if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text: raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7) @@ -71,8 +65,7 @@ def raise_for_httperror(resp): raise_for_captcha(resp) if resp.status_code in (402, 403): raise SearxEngineAccessDeniedException( - message='HTTP error ' + str(resp.status_code), - suspended_time=3600 * 24 + message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24 ) if resp.status_code == 429: raise SearxEngineTooManyRequestsException() diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 4c824da28..7815c2099 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -51,11 +51,7 @@ def sync_resource(base_path, resource_path, name, target_dir, plugin_dir): dep_stat = stat(dep_path) utime(resource_path, ns=(dep_stat.st_atime_ns, dep_stat.st_mtime_ns)) except IOError: - logger.critical( - "failed to copy plugin resource {0} for plugin {1}".format( - file_name, name - ) - ) + logger.critical("failed to copy plugin resource {0} for plugin {1}".format(file_name, name)) sys.exit(3) # returning with the web path of the resource @@ -66,36 +62,28 @@ def prepare_package_resources(plugin, plugin_module_name): plugin_base_path = dirname(abspath(plugin.__file__)) plugin_dir = plugin_module_name - target_dir = join( - settings["ui"]["static_path"], "plugins/external_plugins", plugin_dir - ) + target_dir = join(settings["ui"]["static_path"], "plugins/external_plugins", plugin_dir) try: makedirs(target_dir, exist_ok=True) except IOError: - logger.critical( - "failed to create resource directory {0} for plugin {1}".format( - target_dir, plugin_module_name - ) - ) + logger.critical("failed to create resource directory {0} for plugin {1}".format(target_dir, plugin_module_name)) sys.exit(3) resources = [] if hasattr(plugin, "js_dependencies"): resources.extend(map(basename, plugin.js_dependencies)) - plugin.js_dependencies = ([ - sync_resource( - plugin_base_path, x, plugin_module_name, target_dir, plugin_dir - ) for x in plugin.js_dependencies - ]) + plugin.js_dependencies = [ + sync_resource(plugin_base_path, x, plugin_module_name, target_dir, plugin_dir) + for x in plugin.js_dependencies + ] if hasattr(plugin, "css_dependencies"): resources.extend(map(basename, plugin.css_dependencies)) - plugin.css_dependencies = ([ - sync_resource( - plugin_base_path, x, plugin_module_name, target_dir, plugin_dir - ) for x in plugin.css_dependencies - ]) + plugin.css_dependencies = [ + sync_resource(plugin_base_path, x, plugin_module_name, target_dir, plugin_dir) + for x in plugin.css_dependencies + ] for f in listdir(target_dir): if basename(f) not in resources: @@ -104,9 +92,7 @@ def prepare_package_resources(plugin, plugin_module_name): remove(resource_path) except IOError: logger.critical( - "failed to remove unused resource file {0} for plugin {1}".format( - resource_path, plugin_module_name - ) + "failed to remove unused resource file {0} for plugin {1}".format(resource_path, plugin_module_name) ) sys.exit(3) @@ -137,9 +123,7 @@ def load_plugin(plugin_module_name, external): for plugin_attr, plugin_attr_type in required_attrs: if not hasattr(plugin, plugin_attr): - logger.critical( - '%s: missing attribute "%s", cannot load plugin', plugin, plugin_attr - ) + logger.critical('%s: missing attribute "%s", cannot load plugin', plugin, plugin_attr) sys.exit(3) attr = getattr(plugin, plugin_attr) if not isinstance(attr, plugin_attr_type): @@ -152,9 +136,7 @@ def load_plugin(plugin_module_name, external): sys.exit(3) for plugin_attr, plugin_attr_type in optional_attrs: - if not hasattr(plugin, plugin_attr) or not isinstance( - getattr(plugin, plugin_attr), plugin_attr_type - ): + if not hasattr(plugin, plugin_attr) or not isinstance(getattr(plugin, plugin_attr), plugin_attr_type): setattr(plugin, plugin_attr, plugin_attr_type()) if not hasattr(plugin, "preference_section"): @@ -164,19 +146,12 @@ def load_plugin(plugin_module_name, external): if plugin.preference_section == "query": for plugin_attr in ("query_keywords", "query_examples"): if not hasattr(plugin, plugin_attr): - logger.critical( - 'missing attribute "{0}", cannot load plugin: {1}'.format( - plugin_attr, plugin - ) - ) + logger.critical('missing attribute "{0}", cannot load plugin: {1}'.format(plugin_attr, plugin)) sys.exit(3) if settings.get("enabled_plugins"): # searx compatibility: plugin.name in settings['enabled_plugins'] - plugin.default_on = ( - plugin.name in settings["enabled_plugins"] - or plugin.id in settings["enabled_plugins"] - ) + plugin.default_on = plugin.name in settings["enabled_plugins"] or plugin.id in settings["enabled_plugins"] # copy ressources if this is an external plugin if external: @@ -193,9 +168,7 @@ def load_and_initialize_plugin(plugin_module_name, external, init_args): try: return plugin if plugin.init(*init_args) else None except Exception: # pylint: disable=broad-except - plugin.logger.exception( - "Exception while calling init, the plugin is disabled" - ) + plugin.logger.exception("Exception while calling init, the plugin is disabled") return None return plugin diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py index 2dcc01e05..54d28bc9a 100644 --- a/searx/plugins/oa_doi_rewrite.py +++ b/searx/plugins/oa_doi_rewrite.py @@ -39,7 +39,7 @@ def on_result(request, search, result): if doi and len(doi) < 50: for suffix in ('/', '.pdf', '.xml', '/full', '/meta', '/abstract'): if doi.endswith(suffix): - doi = doi[:-len(suffix)] + doi = doi[: -len(suffix)] result['url'] = get_doi_resolver(request.preferences) + doi result['parsed_url'] = urlparse(result['url']) return True diff --git a/searx/plugins/search_on_category_select.py b/searx/plugins/search_on_category_select.py index 2a38cac78..48d537cee 100644 --- a/searx/plugins/search_on_category_select.py +++ b/searx/plugins/search_on_category_select.py @@ -15,9 +15,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, <asciimoo@gmail.com> ''' from flask_babel import gettext + name = gettext('Search on category select') -description = gettext('Perform search immediately if a category selected. ' - 'Disable to select multiple categories. (JavaScript required)') +description = gettext( + 'Perform search immediately if a category selected. ' 'Disable to select multiple categories. (JavaScript required)' +) default_on = True preference_section = 'ui' diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index 053899483..29bd5ca5c 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -16,6 +16,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' from flask_babel import gettext import re + name = gettext('Self Informations') description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".') default_on = True diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 98ddddbcd..42c58e524 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -19,10 +19,12 @@ from flask_babel import gettext import re from urllib.parse import urlunparse, parse_qsl, urlencode -regexes = {re.compile(r'utm_[^&]+'), - re.compile(r'(wkey|wemail)[^&]*'), - re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'), - re.compile(r'&$')} +regexes = { + re.compile(r'utm_[^&]+'), + re.compile(r'(wkey|wemail)[^&]*'), + re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'), + re.compile(r'&$'), +} name = gettext('Tracker URL remover') description = gettext('Remove trackers arguments from the returned URL') diff --git a/searx/plugins/vim_hotkeys.py b/searx/plugins/vim_hotkeys.py index 47b830c79..fb61d413b 100644 --- a/searx/plugins/vim_hotkeys.py +++ b/searx/plugins/vim_hotkeys.py @@ -1,9 +1,11 @@ from flask_babel import gettext name = gettext('Vim-like hotkeys') -description = gettext('Navigate search results with Vim-like hotkeys ' - '(JavaScript required). ' - 'Press "h" key on main or result page to get help.') +description = gettext( + 'Navigate search results with Vim-like hotkeys ' + '(JavaScript required). ' + 'Press "h" key on main or result page to get help.' +) default_on = False preference_section = 'ui' diff --git a/searx/preferences.py b/searx/preferences.py index 49f6ef202..2a9b0af0c 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -21,14 +21,12 @@ DOI_RESOLVERS = list(settings['doi_resolvers']) class MissingArgumentException(Exception): - """Exption from ``cls._post_init`` when a argument is missed. - """ + """Exption from ``cls._post_init`` when a argument is missed.""" class ValidationException(Exception): - """Exption from ``cls._post_init`` when configuration value is invalid. - """ + """Exption from ``cls._post_init`` when configuration value is invalid.""" class Setting: @@ -84,8 +82,7 @@ class EnumStringSetting(Setting): raise ValidationException('Invalid value: "{0}"'.format(selection)) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" self._validate_selection(data) self.value = data @@ -104,8 +101,7 @@ class MultipleChoiceSetting(EnumStringSetting): self._validate_selections(self.value) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" if data == '': self.value = [] return @@ -124,25 +120,23 @@ class MultipleChoiceSetting(EnumStringSetting): self.value.append(choice) def save(self, name, resp): - """Save cookie ``name`` in the HTTP reponse obect - """ + """Save cookie ``name`` in the HTTP reponse obect""" resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) class SetSetting(Setting): - """Setting of values of type ``set`` (comma separated string) """ + """Setting of values of type ``set`` (comma separated string)""" + def _post_init(self): if not hasattr(self, 'values'): self.values = set() def get_value(self): - """Returns a string with comma separated values. - """ + """Returns a string with comma separated values.""" return ','.join(self.values) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" if data == '': self.values = set() # pylint: disable=attribute-defined-outside-init return @@ -159,8 +153,7 @@ class SetSetting(Setting): self.values = set(elements) # pylint: disable=attribute-defined-outside-init def save(self, name, resp): - """Save cookie ``name`` in the HTTP reponse obect - """ + """Save cookie ``name`` in the HTTP reponse obect""" resp.set_cookie(name, ','.join(self.values), max_age=COOKIE_MAX_AGE) @@ -172,8 +165,7 @@ class SearchLanguageSetting(EnumStringSetting): raise ValidationException('Invalid language code: "{0}"'.format(selection)) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" if data not in self.choices and data != self.value: # pylint: disable=no-member # hack to give some backwards compatibility with old language cookies data = str(data).replace('_', '-') @@ -199,8 +191,7 @@ class MapSetting(Setting): raise ValidationException('Invalid default value') def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" # pylint: disable=no-member if data not in self.map: raise ValidationException('Invalid choice: {0}'.format(data)) @@ -208,14 +199,13 @@ class MapSetting(Setting): self.key = data # pylint: disable=attribute-defined-outside-init def save(self, name, resp): - """Save cookie ``name`` in the HTTP reponse obect - """ + """Save cookie ``name`` in the HTTP reponse obect""" if hasattr(self, 'key'): resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE) class SwitchableSetting(Setting): - """ Base class for settings that can be turned on && off""" + """Base class for settings that can be turned on && off""" def _post_init(self): self.disabled = set() @@ -244,7 +234,7 @@ class SwitchableSetting(Setting): items = self.transform_form_items(items) self.disabled = set() # pylint: disable=attribute-defined-outside-init - self.enabled = set() # pylint: disable=attribute-defined-outside-init + self.enabled = set() # pylint: disable=attribute-defined-outside-init for choice in self.choices: # pylint: disable=no-member if choice['default_on']: if choice['id'] in items: @@ -254,8 +244,7 @@ class SwitchableSetting(Setting): self.enabled.add(choice['id']) def save(self, resp): # pylint: disable=arguments-differ - """Save cookie in the HTTP reponse obect - """ + """Save cookie in the HTTP reponse obect""" resp.set_cookie('disabled_{0}'.format(self.value), ','.join(self.disabled), max_age=COOKIE_MAX_AGE) resp.set_cookie('enabled_{0}'.format(self.value), ','.join(self.enabled), max_age=COOKIE_MAX_AGE) @@ -289,7 +278,7 @@ class EnginesSetting(SwitchableSetting): self.choices = transformed_choices def transform_form_items(self, items): - return [item[len('engine_'):].replace('_', ' ').replace(' ', '__') for item in items] + return [item[len('engine_') :].replace('_', ' ').replace(' ', '__') for item in items] def transform_values(self, values): if len(values) == 1 and next(iter(values)) == '': @@ -315,7 +304,7 @@ class PluginsSetting(SwitchableSetting): self.choices = transformed_choices def transform_form_items(self, items): - return [item[len('plugin_'):] for item in items] + return [item[len('plugin_') :] for item in items] class Preferences: @@ -468,19 +457,18 @@ class Preferences: continue self.key_value_settings[user_setting_name].parse(user_setting) elif user_setting_name == 'disabled_engines': - self.engines.parse_cookie((input_data.get('disabled_engines', ''), - input_data.get('enabled_engines', ''))) + self.engines.parse_cookie( + (input_data.get('disabled_engines', ''), input_data.get('enabled_engines', '')) + ) elif user_setting_name == 'disabled_plugins': - self.plugins.parse_cookie((input_data.get('disabled_plugins', ''), - input_data.get('enabled_plugins', ''))) + self.plugins.parse_cookie( + (input_data.get('disabled_plugins', ''), input_data.get('enabled_plugins', '')) + ) elif user_setting_name == 'tokens': self.tokens.parse(user_setting) - elif not any(user_setting_name.startswith(x) for x in [ - 'enabled_', - 'disabled_', - 'engine_', - 'category_', - 'plugin_']): + elif not any( + user_setting_name.startswith(x) for x in ['enabled_', 'disabled_', 'engine_', 'category_', 'plugin_'] + ): self.unknown_params[user_setting_name] = user_setting def parse_form(self, input_data): @@ -494,7 +482,7 @@ class Preferences: elif user_setting_name.startswith('engine_'): disabled_engines.append(user_setting_name) elif user_setting_name.startswith('category_'): - enabled_categories.append(user_setting_name[len('category_'):]) + enabled_categories.append(user_setting_name[len('category_') :]) elif user_setting_name.startswith('plugin_'): disabled_plugins.append(user_setting_name) elif user_setting_name == 'tokens': @@ -507,8 +495,7 @@ class Preferences: # cannot be used in case of engines or plugins def get_value(self, user_setting_name): - """Returns the value for ``user_setting_name`` - """ + """Returns the value for ``user_setting_name``""" ret_val = None if user_setting_name in self.key_value_settings: ret_val = self.key_value_settings[user_setting_name].get_value() @@ -517,8 +504,7 @@ class Preferences: return ret_val def save(self, resp): - """Save cookie in the HTTP reponse obect - """ + """Save cookie in the HTTP reponse obect""" for user_setting_name, user_setting in self.key_value_settings.items(): # pylint: disable=unnecessary-dict-index-lookup if self.key_value_settings[user_setting_name].locked: @@ -544,8 +530,7 @@ class Preferences: def is_locked(setting_name): - """Checks if a given setting name is locked by settings.yml - """ + """Checks if a given setting name is locked by settings.yml""" if 'preferences' not in settings: return False if 'lock' not in settings['preferences']: diff --git a/searx/query.py b/searx/query.py index 7f252e93f..b7f64fe82 100644 --- a/searx/query.py +++ b/searx/query.py @@ -40,7 +40,6 @@ class QueryPartParser(ABC): class TimeoutParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value[0] == '<' @@ -70,7 +69,6 @@ class TimeoutParser(QueryPartParser): class LanguageParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value[0] == ':' @@ -92,11 +90,9 @@ class LanguageParser(QueryPartParser): # if correct language-code is found # set it as new search-language - if (value == lang_id - or value == lang_name - or value == english_name - or value.replace('-', ' ') == country)\ - and value not in self.raw_text_query.languages: + if ( + value == lang_id or value == lang_name or value == english_name or value.replace('-', ' ') == country + ) and value not in self.raw_text_query.languages: found = True lang_parts = lang_id.split('-') if len(lang_parts) == 2: @@ -152,7 +148,6 @@ class LanguageParser(QueryPartParser): class ExternalBangParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value.startswith('!!') @@ -180,7 +175,6 @@ class ExternalBangParser(QueryPartParser): class BangParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value[0] == '!' or raw_value[0] == '?' @@ -208,9 +202,11 @@ class BangParser(QueryPartParser): if value in categories: # using all engines for that search, which # are declared under that categorie name - self.raw_text_query.enginerefs.extend(EngineRef(engine.name, value) - for engine in categories[value] - if (engine.name, value) not in self.raw_text_query.disabled_engines) + self.raw_text_query.enginerefs.extend( + EngineRef(engine.name, value) + for engine in categories[value] + if (engine.name, value) not in self.raw_text_query.disabled_engines + ) return True return False @@ -246,7 +242,7 @@ class RawTextQuery: TimeoutParser, # this force the timeout LanguageParser, # this force a language ExternalBangParser, # external bang (must be before BangParser) - BangParser # this force a engine or category + BangParser, # this force a engine or category ] def __init__(self, query, disabled_engines): @@ -281,8 +277,7 @@ class RawTextQuery: for i, query_part in enumerate(raw_query_parts): # part does only contain spaces, skip - if query_part.isspace()\ - or query_part == '': + if query_part.isspace() or query_part == '': continue # parse special commands @@ -324,14 +319,16 @@ class RawTextQuery: return self.getFullQuery() def __repr__(self): - return f"<{self.__class__.__name__} " \ - + f"query={self.query!r} " \ - + f"disabled_engines={self.disabled_engines!r}\n " \ - + f"languages={self.languages!r} " \ - + f"timeout_limit={self.timeout_limit!r} "\ - + f"external_bang={self.external_bang!r} " \ - + f"specific={self.specific!r} " \ - + f"enginerefs={self.enginerefs!r}\n " \ - + f"autocomplete_list={self.autocomplete_list!r}\n " \ - + f"query_parts={self.query_parts!r}\n " \ - + f"user_query_parts={self.user_query_parts!r} >" + return ( + f"<{self.__class__.__name__} " + + f"query={self.query!r} " + + f"disabled_engines={self.disabled_engines!r}\n " + + f"languages={self.languages!r} " + + f"timeout_limit={self.timeout_limit!r} " + + f"external_bang={self.external_bang!r} " + + f"specific={self.specific!r} " + + f"enginerefs={self.enginerefs!r}\n " + + f"autocomplete_list={self.autocomplete_list!r}\n " + + f"query_parts={self.query_parts!r}\n " + + f"user_query_parts={self.user_query_parts!r} >" + ) diff --git a/searx/results.py b/searx/results.py index 10a26aa3f..6ab751c56 100644 --- a/searx/results.py +++ b/searx/results.py @@ -47,12 +47,8 @@ def compare_urls(url_a, url_b): return False # remove / from the end of the url if required - path_a = url_a.path[:-1]\ - if url_a.path.endswith('/')\ - else url_a.path - path_b = url_b.path[:-1]\ - if url_b.path.endswith('/')\ - else url_b.path + path_a = url_a.path[:-1] if url_a.path.endswith('/') else url_a.path + path_b = url_b.path[:-1] if url_b.path.endswith('/') else url_b.path return unquote(path_a) == unquote(path_b) @@ -83,8 +79,9 @@ def merge_two_infoboxes(infobox1, infobox2): parsed_url2 = urlparse(url2.get('url', '')) entity_url2 = url2.get('entity') for url1 in urls1: - if (entity_url2 is not None and url1.get('entity') == entity_url2)\ - or compare_urls(urlparse(url1.get('url', '')), parsed_url2): + if (entity_url2 is not None and url1.get('entity') == entity_url2) or compare_urls( + urlparse(url1.get('url', '')), parsed_url2 + ): unique_url = False break if unique_url: @@ -115,8 +112,7 @@ def merge_two_infoboxes(infobox1, infobox2): attributeSet.add(entity) for attribute in infobox2.get('attributes', []): - if attribute.get('label') not in attributeSet\ - and attribute.get('entity') not in attributeSet: + if attribute.get('label') not in attributeSet and attribute.get('entity') not in attributeSet: attributes1.append(attribute) if 'content' in infobox2: @@ -144,9 +140,22 @@ def result_score(result): class ResultContainer: """docstring for ResultContainer""" - __slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\ - '_closed', 'paging', 'unresponsive_engines', 'timings', 'redirect_url', 'engine_data', 'on_result',\ - '_lock' + __slots__ = ( + '_merged_results', + 'infoboxes', + 'suggestions', + 'answers', + 'corrections', + '_number_of_results', + '_closed', + 'paging', + 'unresponsive_engines', + 'timings', + 'redirect_url', + 'engine_data', + 'on_result', + '_lock', + ) def __init__(self): super().__init__() @@ -208,8 +217,7 @@ class ResultContainer: if engine_name in engines: histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count') - if not self.paging and standard_result_count > 0 and engine_name in engines\ - and engines[engine_name].paging: + if not self.paging and standard_result_count > 0 and engine_name in engines and engines[engine_name].paging: self.paging = True def _merge_infobox(self, infobox): @@ -248,8 +256,7 @@ class ResultContainer: return True def _normalize_url_result(self, result): - """Return True if the result is valid - """ + """Return True if the result is valid""" result['parsed_url'] = urlparse(result['url']) # if the result has no scheme, use http as default @@ -280,8 +287,9 @@ class ResultContainer: for merged_result in self._merged_results: if 'parsed_url' not in merged_result: continue - if compare_urls(result['parsed_url'], merged_result['parsed_url'])\ - and result_template == merged_result.get('template'): + if compare_urls(result['parsed_url'], merged_result['parsed_url']) and result_template == merged_result.get( + 'template' + ): if result_template != 'images.html': # not an image, same template, same url : it's a duplicate return merged_result @@ -294,8 +302,7 @@ class ResultContainer: def __merge_duplicated_http_result(self, duplicated, result, position): # using content with more text - if result_content_len(result.get('content', '')) >\ - result_content_len(duplicated.get('content', '')): + if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')): duplicated['content'] = result['content'] # merge all result's parameters not found in duplicate @@ -341,18 +348,20 @@ class ResultContainer: res['category'] = engine.categories[0] if len(engine.categories) > 0 else '' # FIXME : handle more than one category per engine - category = res['category']\ - + ':' + res.get('template', '')\ - + ':' + ('img_src' if 'img_src' in res or 'thumbnail' in res else '') + category = ( + res['category'] + + ':' + + res.get('template', '') + + ':' + + ('img_src' if 'img_src' in res or 'thumbnail' in res else '') + ) - current = None if category not in categoryPositions\ - else categoryPositions[category] + current = None if category not in categoryPositions else categoryPositions[category] # group with previous results using the same category # if the group can accept more result and is not too far # from the current position - if current is not None and (current['count'] > 0)\ - and (len(gresults) - current['index'] < 20): + if current is not None and (current['count'] > 0) and (len(gresults) - current['index'] < 20): # group with the previous results using # the same category with this one index = current['index'] diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 0a3c5b3ac..d66f3362d 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -123,8 +123,11 @@ class Search: # Max & user query: From user query except if above max actual_timeout = min(query_timeout, max_request_timeout) - logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})" - .format(actual_timeout, default_timeout, query_timeout, max_request_timeout)) + logger.debug( + "actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})".format( + actual_timeout, default_timeout, query_timeout, max_request_timeout + ) + ) return requests, actual_timeout diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py index 4ce4ca76b..1311288f3 100644 --- a/searx/search/checker/__main__.py +++ b/searx/search/checker/__main__.py @@ -37,12 +37,12 @@ else: stdout = io.TextIOWrapper( # pylint: disable=consider-using-with open(sys.stdout.fileno(), 'wb', 0), - write_through=True + write_through=True, ) stderr = io.TextIOWrapper( # pylint: disable=consider-using-with - open(sys.stderr.fileno(), 'wb', 0) - , write_through=True + open(sys.stderr.fileno(), 'wb', 0), + write_through=True, ) @@ -91,12 +91,21 @@ def run(engine_name_list, verbose): # call by setup.py def main(): parser = argparse.ArgumentParser(description='Check searx engines.') - parser.add_argument('engine_name_list', metavar='engine name', type=str, nargs='*', - help='engines name or shortcut list. Empty for all engines.') - parser.add_argument('--verbose', '-v', - action='store_true', dest='verbose', - help='Display details about the test results', - default=False) + parser.add_argument( + 'engine_name_list', + metavar='engine name', + type=str, + nargs='*', + help='engines name or shortcut list. Empty for all engines.', + ) + parser.add_argument( + '--verbose', + '-v', + action='store_true', + dest='verbose', + help='Display details about the test results', + default=False, + ) args = parser.parse_args() run(args.engine_name_list, args.verbose) diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py index d9f11a71c..ff005dd91 100644 --- a/searx/search/checker/background.py +++ b/searx/search/checker/background.py @@ -23,10 +23,12 @@ running = threading.Lock() def _get_interval(every, error_msg): if isinstance(every, int): every = (every, every) - if not isinstance(every, (tuple, list))\ - or len(every) != 2\ - or not isinstance(every[0], int)\ - or not isinstance(every[1], int): + if ( + not isinstance(every, (tuple, list)) + or len(every) != 2 + or not isinstance(every[0], int) + or not isinstance(every[1], int) + ): raise SearxSettingsException(error_msg, None) return every @@ -50,14 +52,11 @@ def _set_result(result, include_timestamp=True): def run(): - if not running.acquire(blocking=False): # pylint: disable=consider-using-with + if not running.acquire(blocking=False): # pylint: disable=consider-using-with return try: logger.info('Starting checker') - result = { - 'status': 'ok', - 'engines': {} - } + result = {'status': 'ok', 'engines': {}} for name, processor in PROCESSORS.items(): logger.debug('Checking %s engine', name) checker = Checker(processor) diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index e68248c0e..c0dd966d0 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -74,17 +74,23 @@ def _download_and_check_if_image(image_url: str) -> bool: try: # use "image_proxy" (avoid HTTP/2) network.set_context_network_name('image_proxy') - stream = network.stream('GET', image_url, timeout=10.0, allow_redirects=True, headers={ - 'User-Agent': gen_useragent(), - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', - 'Accept-Language': 'en-US;q=0.5,en;q=0.3', - 'Accept-Encoding': 'gzip, deflate, br', - 'DNT': '1', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1', - 'Sec-GPC': '1', - 'Cache-Control': 'max-age=0' - }) + stream = network.stream( + 'GET', + image_url, + timeout=10.0, + allow_redirects=True, + headers={ + 'User-Agent': gen_useragent(), + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'en-US;q=0.5,en;q=0.3', + 'Accept-Encoding': 'gzip, deflate, br', + 'DNT': '1', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'Sec-GPC': '1', + 'Cache-Control': 'max-age=0', + }, + ) r = next(stream) r.close() if r.status_code == 200: @@ -104,8 +110,7 @@ def _download_and_check_if_image(image_url: str) -> bool: def _is_url_image(image_url) -> bool: - """Normalize image_url - """ + """Normalize image_url""" if not isinstance(image_url, str): return False @@ -131,8 +136,9 @@ def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing. } -def _search_query_diff(sq1: SearchQuery, sq2: SearchQuery)\ - -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: +def _search_query_diff( + sq1: SearchQuery, sq2: SearchQuery +) -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: param1 = _search_query_to_dict(sq1) param2 = _search_query_to_dict(sq2) common = {} @@ -182,11 +188,9 @@ class ResultContainerTests: __slots__ = 'test_name', 'search_query', 'result_container', 'languages', 'stop_test', 'test_results' - def __init__(self, - test_results: TestResults, - test_name: str, - search_query: SearchQuery, - result_container: ResultContainer): + def __init__( + self, test_results: TestResults, test_name: str, search_query: SearchQuery, result_container: ResultContainer + ): self.test_name = test_name self.search_query = search_query self.result_container = result_container @@ -326,10 +330,9 @@ class CheckerTests: __slots__ = 'test_results', 'test_name', 'result_container_tests_list' - def __init__(self, - test_results: TestResults, - test_name: str, - result_container_tests_list: typing.List[ResultContainerTests]): + def __init__( + self, test_results: TestResults, test_name: str, result_container_tests_list: typing.List[ResultContainerTests] + ): self.test_results = test_results self.test_name = test_name self.result_container_tests_list = result_container_tests_list @@ -342,14 +345,17 @@ class CheckerTests: for i, urls_i in enumerate(urls_list): for j, urls_j in enumerate(urls_list): if i < j and urls_i == urls_j: - common, diff = _search_query_diff(self.result_container_tests_list[i].search_query, - self.result_container_tests_list[j].search_query) + common, diff = _search_query_diff( + self.result_container_tests_list[i].search_query, + self.result_container_tests_list[j].search_query, + ) common_str = ' '.join(['{}={!r}'.format(k, v) for k, v in common.items()]) - diff1_str = ', ' .join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) - diff2_str = ', ' .join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) - self.test_results.add_error(self.test_name, - 'results are identitical for {} and {} ({})' - .format(diff1_str, diff2_str, common_str)) + diff1_str = ', '.join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) + diff2_str = ', '.join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) + self.test_results.add_error( + self.test_name, + 'results are identitical for {} and {} ({})'.format(diff1_str, diff2_str, common_str), + ) class Checker: @@ -395,9 +401,10 @@ class Checker: elif isinstance(method, types.FunctionType): method(*args) else: - self.test_results.add_error(obj.test_name, - 'method {!r} ({}) not found for {}' - .format(method, method.__class__.__name__, obj.__class__.__name__)) + self.test_results.add_error( + obj.test_name, + 'method {!r} ({}) not found for {}'.format(method, method.__class__.__name__, obj.__class__.__name__), + ) def call_tests(self, obj, test_descriptions): for test_description in test_descriptions: diff --git a/searx/search/models.py b/searx/search/models.py index e48cb3611..ff5897966 100644 --- a/searx/search/models.py +++ b/searx/search/models.py @@ -25,19 +25,30 @@ class EngineRef: class SearchQuery: """container for all the search parameters (query, language, etc...)""" - __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\ - 'timeout_limit', 'external_bang', 'engine_data' - - def __init__(self, - query: str, - engineref_list: typing.List[EngineRef], - lang: str='all', - safesearch: int=0, - pageno: int=1, - time_range: typing.Optional[str]=None, - timeout_limit: typing.Optional[float]=None, - external_bang: typing.Optional[str]=None, - engine_data: typing.Optional[typing.Dict[str, str]]=None): + __slots__ = ( + 'query', + 'engineref_list', + 'lang', + 'safesearch', + 'pageno', + 'time_range', + 'timeout_limit', + 'external_bang', + 'engine_data', + ) + + def __init__( + self, + query: str, + engineref_list: typing.List[EngineRef], + lang: str = 'all', + safesearch: int = 0, + pageno: int = 1, + time_range: typing.Optional[str] = None, + timeout_limit: typing.Optional[float] = None, + external_bang: typing.Optional[str] = None, + engine_data: typing.Optional[typing.Dict[str, str]] = None, + ): self.query = query self.engineref_list = engineref_list self.lang = lang @@ -53,20 +64,39 @@ class SearchQuery: return list(set(map(lambda engineref: engineref.category, self.engineref_list))) def __repr__(self): - return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\ - format(self.query, self.engineref_list, self.lang, self.safesearch, - self.pageno, self.time_range, self.timeout_limit, self.external_bang) + return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format( + self.query, + self.engineref_list, + self.lang, + self.safesearch, + self.pageno, + self.time_range, + self.timeout_limit, + self.external_bang, + ) def __eq__(self, other): - return self.query == other.query\ - and self.engineref_list == other.engineref_list\ - and self.lang == other.lang\ - and self.safesearch == other.safesearch\ - and self.pageno == other.pageno\ - and self.time_range == other.time_range\ - and self.timeout_limit == other.timeout_limit\ + return ( + self.query == other.query + and self.engineref_list == other.engineref_list + and self.lang == other.lang + and self.safesearch == other.safesearch + and self.pageno == other.pageno + and self.time_range == other.time_range + and self.timeout_limit == other.timeout_limit and self.external_bang == other.external_bang + ) def __hash__(self): - return hash((self.query, tuple(self.engineref_list), self.lang, self.safesearch, self.pageno, self.time_range, - self.timeout_limit, self.external_bang)) + return hash( + ( + self.query, + tuple(self.engineref_list), + self.lang, + self.safesearch, + self.pageno, + self.time_range, + self.timeout_limit, + self.external_bang, + ) + ) diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index 8108f8dfa..966b990ec 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -29,6 +29,7 @@ logger = logger.getChild('search.processors') PROCESSORS = {} """Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" + def get_processor_class(engine_type): """Return processor class according to the ``engine_type``""" for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]: diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index b5fa063fd..732b55d52 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -19,6 +19,7 @@ from searx.utils import get_engine_from_settings logger = logger.getChild('searx.search.processor') SUSPENDED_STATUS = {} + class SuspendedStatus: """Class to handle suspend state.""" @@ -39,8 +40,10 @@ class SuspendedStatus: # update continuous_errors / suspend_end_time self.continuous_errors += 1 if suspended_time is None: - suspended_time = min(settings['search']['max_ban_time_on_fail'], - self.continuous_errors * settings['search']['ban_time_on_fail']) + suspended_time = min( + settings['search']['max_ban_time_on_fail'], + self.continuous_errors * settings['search']['ban_time_on_fail'], + ) self.suspend_end_time = default_timer() + suspended_time self.suspend_reason = suspend_reason logger.debug('Suspend for %i seconds', suspended_time) @@ -127,9 +130,9 @@ class EngineProcessor(ABC): def extend_container_if_suspended(self, result_container): if self.suspended_status.is_suspended: - result_container.add_unresponsive_engine(self.engine_name, - self.suspended_status.suspend_reason, - suspended=True) + result_container.add_unresponsive_engine( + self.engine_name, self.suspended_status.suspend_reason, suspended=True + ) return True return False diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py index ec7a4a36e..13f077cb1 100644 --- a/searx/search/processors/offline.py +++ b/searx/search/processors/offline.py @@ -23,6 +23,6 @@ class OfflineProcessor(EngineProcessor): except ValueError as e: # do not record the error self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index 674ba9c8e..8d8275df1 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -66,10 +66,7 @@ class OnlineProcessor(EngineProcessor): # create dictionary which contain all # informations about the request request_args = dict( - headers=params['headers'], - cookies=params['cookies'], - verify=params['verify'], - auth=params['auth'] + headers=params['headers'], cookies=params['cookies'], verify=params['verify'], auth=params['auth'] ) # max_redirects @@ -105,10 +102,12 @@ class OnlineProcessor(EngineProcessor): status_code = str(response.status_code or '') reason = response.reason_phrase or '' hostname = response.url.host - count_error(self.engine_name, - '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), - (status_code, reason, hostname), - secondary=True) + count_error( + self.engine_name, + '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), + (status_code, reason, hostname), + secondary=True, + ) return response @@ -147,22 +146,16 @@ class OnlineProcessor(EngineProcessor): # requests timeout (connect or read) self.handle_exception(result_container, e, suspend=True) self.logger.error( - "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}" - .format( - default_timer() - start_time, - timeout_limit, - e.__class__.__name__ + "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}".format( + default_timer() - start_time, timeout_limit, e.__class__.__name__ ) ) except (httpx.HTTPError, httpx.StreamError) as e: # other requests exception self.handle_exception(result_container, e, suspend=True) self.logger.exception( - "requests exception (search duration : {0} s, timeout: {1} s) : {2}" - .format( - default_timer() - start_time, - timeout_limit, - e + "requests exception (search duration : {0} s, timeout: {1} s) : {2}".format( + default_timer() - start_time, timeout_limit, e ) ) except SearxEngineCaptchaException as e: @@ -188,10 +181,9 @@ class OnlineProcessor(EngineProcessor): if getattr(self.engine, 'paging', False): tests['paging'] = { - 'matrix': {'query': 'time', - 'pageno': (1, 2, 3)}, + 'matrix': {'query': 'time', 'pageno': (1, 2, 3)}, 'result_container': ['not_empty'], - 'test': ['unique_results'] + 'test': ['unique_results'], } if 'general' in self.engine.categories: # avoid documentation about HTML tags (<time> and <input type="time">) @@ -199,10 +191,9 @@ class OnlineProcessor(EngineProcessor): if getattr(self.engine, 'time_range', False): tests['time_range'] = { - 'matrix': {'query': 'news', - 'time_range': (None, 'day')}, + 'matrix': {'query': 'news', 'time_range': (None, 'day')}, 'result_container': ['not_empty'], - 'test': ['unique_results'] + 'test': ['unique_results'], } if getattr(self.engine, 'supported_languages', []): @@ -216,10 +207,6 @@ class OnlineProcessor(EngineProcessor): } if getattr(self.engine, 'safesearch', False): - tests['safesearch'] = { - 'matrix': {'query': 'porn', - 'safesearch': (0, 2)}, - 'test': ['unique_results'] - } + tests['safesearch'] = {'matrix': {'query': 'porn', 'safesearch': (0, 2)}, 'test': ['unique_results']} return tests diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py index 4e5c57264..6bd891b1d 100644 --- a/searx/search/processors/online_currency.py +++ b/searx/search/processors/online_currency.py @@ -12,11 +12,13 @@ from .online import OnlineProcessor parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) + def normalize_name(name): name = name.lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() + def name_to_iso4217(name): name = normalize_name(name) currency = CURRENCIES['names'].get(name, [name]) @@ -24,9 +26,11 @@ def name_to_iso4217(name): return currency return currency[0] + def iso4217_to_name(iso4217, language): return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217) + class OnlineCurrencyProcessor(OnlineProcessor): """Processor class used by ``online_currency`` engines.""" diff --git a/searx/search/processors/online_dictionary.py b/searx/search/processors/online_dictionary.py index 72941d57a..3e7f6ed59 100644 --- a/searx/search/processors/online_dictionary.py +++ b/searx/search/processors/online_dictionary.py @@ -11,6 +11,7 @@ from .online import OnlineProcessor parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) + class OnlineDictionaryProcessor(OnlineProcessor): """Processor class used by ``online_dictionary`` engines.""" @@ -44,10 +45,9 @@ class OnlineDictionaryProcessor(OnlineProcessor): if getattr(self.engine, 'paging', False): tests['translation_paging'] = { - 'matrix': {'query': 'en-es house', - 'pageno': (1, 2, 3)}, + 'matrix': {'query': 'en-es house', 'pageno': (1, 2, 3)}, 'result_container': ['not_empty', ('one_title_contains', 'house')], - 'test': ['unique_results'] + 'test': ['unique_results'], } else: tests['translation'] = { diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py index db020db7d..9c4711bfc 100644 --- a/searx/settings_defaults.py +++ b/searx/settings_defaults.py @@ -53,29 +53,24 @@ SEARX_ENVIRON_VARIABLES = { } - class SettingsValue: - """Check and update a setting value - """ - - def __init__(self, - type_definition: typing.Union[None, typing.Any, typing.Tuple[typing.Any]]=None, - default: typing.Any=None, - environ_name: str=None): + """Check and update a setting value""" + + def __init__( + self, + type_definition: typing.Union[None, typing.Any, typing.Tuple[typing.Any]] = None, + default: typing.Any = None, + environ_name: str = None, + ): self.type_definition = ( - type_definition - if type_definition is None or isinstance(type_definition, tuple) - else (type_definition,) + type_definition if type_definition is None or isinstance(type_definition, tuple) else (type_definition,) ) self.default = default self.environ_name = environ_name @property def type_definition_repr(self): - types_str = [ - t.__name__ if isinstance(t, type) else repr(t) - for t in self.type_definition - ] + types_str = [t.__name__ if isinstance(t, type) else repr(t) for t in self.type_definition] return ', '.join(types_str) def check_type_definition(self, value: typing.Any) -> None: @@ -83,9 +78,7 @@ class SettingsValue: return type_list = tuple(t for t in self.type_definition if isinstance(t, type)) if not isinstance(value, type_list): - raise ValueError( - 'The value has to be one of these types/values: {}'.format( - self.type_definition_repr)) + raise ValueError('The value has to be one of these types/values: {}'.format(self.type_definition_repr)) def __call__(self, value: typing.Any) -> typing.Any: if value == _UNDEFINED: @@ -101,8 +94,7 @@ class SettingsValue: class SettingSublistValue(SettingsValue): - """Check the value is a sublist of type definition. - """ + """Check the value is a sublist of type definition.""" def check_type_definition(self, value: typing.Any) -> typing.Any: if not isinstance(value, list): @@ -111,9 +103,9 @@ class SettingSublistValue(SettingsValue): if not item in self.type_definition[0]: raise ValueError('{} not in {}'.format(item, self.type_definition)) + class SettingsDirectoryValue(SettingsValue): - """Check and update a setting value that is a directory path - """ + """Check and update a setting value that is a directory path""" def check_type_definition(self, value: typing.Any) -> typing.Any: super().check_type_definition(value) @@ -159,7 +151,7 @@ SCHEMA = { 'wiki_url': SettingsValue(str, 'https://github.com/searxng/searxng/wiki'), }, 'search': { - 'safe_search': SettingsValue((0,1,2), 0), + 'safe_search': SettingsValue((0, 1, 2), 0), 'autocomplete': SettingsValue(str, ''), 'default_lang': SettingsValue(tuple(LANGUAGE_CODES + ['']), ''), 'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES), @@ -168,7 +160,7 @@ SCHEMA = { 'formats': SettingsValue(list, OUTPUT_FORMATS), }, 'server': { - 'port': SettingsValue((int,str), 8888, 'SEARXNG_PORT'), + 'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'), 'bind_address': SettingsValue(str, '127.0.0.1', 'SEARXNG_BIND_ADDRESS'), 'secret_key': SettingsValue(str, environ_name='SEARXNG_SECRET'), 'base_url': SettingsValue((False, str), False), @@ -213,8 +205,7 @@ SCHEMA = { # Tor configuration 'using_tor_proxy': SettingsValue(bool, False), 'extra_proxy_timeout': SettingsValue(int, 0), - 'networks': { - }, + 'networks': {}, }, 'plugins': SettingsValue(list, []), 'enabled_plugins': SettingsValue((None, list), None), @@ -222,10 +213,10 @@ SCHEMA = { 'off_when_debug': SettingsValue(bool, True), }, 'engines': SettingsValue(list, []), - 'doi_resolvers': { - }, + 'doi_resolvers': {}, } + def settings_set_defaults(settings): # compatibility with searx variables for searx, searxng in SEARX_ENVIRON_VARIABLES.items(): diff --git a/searx/settings_loader.py b/searx/settings_loader.py index f688be8ba..14ca8b4aa 100644 --- a/searx/settings_loader.py +++ b/searx/settings_loader.py @@ -125,8 +125,7 @@ def load_settings(load_user_setttings=True): user_settings_path = get_user_settings_path() if user_settings_path is None or not load_user_setttings: # no user settings - return (load_yaml(default_settings_path), - 'load the default settings from {}'.format(default_settings_path)) + return (load_yaml(default_settings_path), 'load the default settings from {}'.format(default_settings_path)) # user settings user_settings = load_yaml(user_settings_path) @@ -134,10 +133,12 @@ def load_settings(load_user_setttings=True): # the user settings are merged with the default configuration default_settings = load_yaml(default_settings_path) update_settings(default_settings, user_settings) - return (default_settings, - 'merge the default settings ( {} ) and the user setttings ( {} )' - .format(default_settings_path, user_settings_path)) + return ( + default_settings, + 'merge the default settings ( {} ) and the user setttings ( {} )'.format( + default_settings_path, user_settings_path + ), + ) # the user settings, fully replace the default configuration - return (user_settings, - 'load the user settings from {}'.format(user_settings_path)) + return (user_settings, 'load the user settings from {}'.format(user_settings_path)) diff --git a/searx/shared/__init__.py b/searx/shared/__init__.py index cbe24d239..98c9a11c2 100644 --- a/searx/shared/__init__.py +++ b/searx/shared/__init__.py @@ -9,6 +9,7 @@ try: except: # no uwsgi from .shared_simple import SimpleSharedDict as SharedDict, schedule + logger.info('Use shared_simple implementation') else: try: @@ -17,15 +18,19 @@ else: raise Exception() except: # uwsgi.ini configuration problem: disable all scheduling - logger.error('uwsgi.ini configuration error, add this line to your uwsgi.ini\n' - 'cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1') + logger.error( + 'uwsgi.ini configuration error, add this line to your uwsgi.ini\n' + 'cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1' + ) from .shared_simple import SimpleSharedDict as SharedDict def schedule(delay, func, *args): return False + else: # uwsgi from .shared_uwsgi import UwsgiCacheSharedDict as SharedDict, schedule + logger.info('Use shared_uwsgi implementation') storage = SharedDict() diff --git a/searx/shared/shared_abstract.py b/searx/shared/shared_abstract.py index b1c72aabe..b4b15bea6 100644 --- a/searx/shared/shared_abstract.py +++ b/searx/shared/shared_abstract.py @@ -3,7 +3,6 @@ from abc import ABC, abstractmethod class SharedDict(ABC): - @abstractmethod def get_int(self, key): pass diff --git a/searx/shared/shared_simple.py b/searx/shared/shared_simple.py index 48d8cb822..0bf13a2a6 100644 --- a/searx/shared/shared_simple.py +++ b/searx/shared/shared_simple.py @@ -7,7 +7,7 @@ from . import shared_abstract class SimpleSharedDict(shared_abstract.SharedDict): - __slots__ = 'd', + __slots__ = ('d',) def __init__(self): self.d = {} diff --git a/searx/shared/shared_uwsgi.py b/searx/shared/shared_uwsgi.py index a6dba9f59..592e24a4b 100644 --- a/searx/shared/shared_uwsgi.py +++ b/searx/shared/shared_uwsgi.py @@ -9,7 +9,6 @@ _last_signal = 10 class UwsgiCacheSharedDict(shared_abstract.SharedDict): - def get_int(self, key): value = uwsgi.cache_get(key) if value is None: diff --git a/searx/unixthreadname.py b/searx/unixthreadname.py index 0f1f54936..3c2a68917 100644 --- a/searx/unixthreadname.py +++ b/searx/unixthreadname.py @@ -11,10 +11,12 @@ except ImportError: pass else: import threading + old_thread_init = threading.Thread.__init__ def new_thread_init(self, *args, **kwargs): # pylint: disable=protected-access, disable=c-extension-no-member old_thread_init(self, *args, **kwargs) setproctitle.setthreadtitle(self._name) + threading.Thread.__init__ = new_thread_init diff --git a/searx/utils.py b/searx/utils.py index 163892e93..d44bb73ea 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -23,8 +23,7 @@ from searx import logger logger = logger.getChild('utils') -blocked_tags = ('script', - 'style') +blocked_tags = ('script', 'style') ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE) ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE) @@ -43,8 +42,8 @@ NOTSET = NotSetClass() def searx_useragent(): """Return the searx User Agent""" return 'searx/{searx_version} {suffix}'.format( - searx_version=VERSION_TAG, - suffix=settings['outgoing']['useragent_suffix']).strip() + searx_version=VERSION_TAG, suffix=settings['outgoing']['useragent_suffix'] + ).strip() def gen_useragent(os=None): @@ -60,7 +59,6 @@ class HTMLTextExtractorException(Exception): class HTMLTextExtractor(HTMLParser): # pylint: disable=W0223 # (see https://bugs.python.org/issue31844) - def __init__(self): HTMLParser.__init__(self) self.result = [] @@ -135,10 +133,10 @@ def html_to_text(html_str): def extract_text(xpath_results, allow_none=False): """Extract text from a lxml result - * if xpath_results is list, extract the text from each result and concat the list - * if xpath_results is a xml element, extract all the text node from it - ( text_content() method from lxml ) - * if xpath_results is a string element, then it's already done + * if xpath_results is list, extract the text from each result and concat the list + * if xpath_results is a xml element, extract all the text node from it + ( text_content() method from lxml ) + * if xpath_results is a string element, then it's already done """ if isinstance(xpath_results, list): # it's list of result : concat everything using recursive call @@ -148,9 +146,7 @@ def extract_text(xpath_results, allow_none=False): return result.strip() elif isinstance(xpath_results, ElementBase): # it's a element - text = html.tostring( - xpath_results, encoding='unicode', method='text', with_tail=False - ) + text = html.tostring(xpath_results, encoding='unicode', method='text', with_tail=False) text = text.strip().replace('\n', ' ') return ' '.join(text.split()) elif isinstance(xpath_results, (_ElementStringResult, _ElementUnicodeResult, str, Number, bool)): @@ -344,7 +340,7 @@ def is_valid_lang(lang): """ if isinstance(lang, bytes): lang = lang.decode() - is_abbr = (len(lang) == 2) + is_abbr = len(lang) == 2 lang = lang.lower() if is_abbr: for l in language_codes: diff --git a/searx/version.py b/searx/version.py index ac42834d9..9b3fcc35e 100644 --- a/searx/version.py +++ b/searx/version.py @@ -81,16 +81,12 @@ def get_git_version(): # add "-dirty" suffix if there are uncommited changes except searx/settings.yml try: - subprocess_run( - "git diff --quiet -- . ':!searx/settings.yml' ':!utils/brand.env'" - ) + subprocess_run("git diff --quiet -- . ':!searx/settings.yml' ':!utils/brand.env'") except subprocess.CalledProcessError as e: if e.returncode == 1: git_version += "-dirty" else: - logger.warning( - '"%s" returns an unexpected return code %i', e.returncode, e.cmd - ) + logger.warning('"%s" returns an unexpected return code %i', e.returncode, e.cmd) return git_version, tag_version @@ -125,9 +121,7 @@ VERSION_TAG = "{VERSION_TAG}" GIT_URL = "{GIT_URL}" GIT_BRANCH = "{GIT_BRANCH}" """ - with open( - os.path.join(os.path.dirname(__file__), "version_frozen.py"), - "w", encoding="utf8") as f: + with open(os.path.join(os.path.dirname(__file__), "version_frozen.py"), "w", encoding="utf8") as f: f.write(python_code) print(f"{f.name} created") else: diff --git a/searx/webadapter.py b/searx/webadapter.py index 58cbf1d90..4fd18cee9 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -15,8 +15,9 @@ def deduplicate_engineref_list(engineref_list: List[EngineRef]) -> List[EngineRe return list(engineref_dict.values()) -def validate_engineref_list(engineref_list: List[EngineRef], preferences: Preferences)\ - -> Tuple[List[EngineRef], List[EngineRef], List[EngineRef]]: +def validate_engineref_list( + engineref_list: List[EngineRef], preferences: Preferences +) -> Tuple[List[EngineRef], List[EngineRef], List[EngineRef]]: """Validate query_engines according to the preferences Returns: @@ -154,9 +155,11 @@ def get_selected_categories(preferences: Preferences, form: Optional[Dict[str, s def get_engineref_from_category_list(category_list: List[str], disabled_engines: List[str]) -> List[EngineRef]: result = [] for categ in category_list: - result.extend(EngineRef(engine.name, categ) - for engine in categories[categ] - if (engine.name, categ) not in disabled_engines) + result.extend( + EngineRef(engine.name, categ) + for engine in categories[categ] + if (engine.name, categ) not in disabled_engines + ) return result @@ -170,8 +173,11 @@ def parse_generic(preferences: Preferences, form: Dict[str, str], disabled_engin # parse the form only if the categories are not locked for pd_name, pd in form.items(): if pd_name == 'engines': - pd_engines = [EngineRef(engine_name, engines[engine_name].categories[0]) - for engine_name in map(str.strip, pd.split(',')) if engine_name in engines] + pd_engines = [ + EngineRef(engine_name, engines[engine_name].categories[0]) + for engine_name in map(str.strip, pd.split(',')) + if engine_name in engines + ] if pd_engines: query_engineref_list.extend(pd_engines) explicit_engine_list = True @@ -206,8 +212,9 @@ def parse_engine_data(form): return engine_data -def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str])\ - -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]: +def get_search_query_from_webapp( + preferences: Preferences, form: Dict[str, str] +) -> Tuple[SearchQuery, RawTextQuery, List[EngineRef], List[EngineRef]]: # no text for the query ? if not form.get('q'): raise SearxParameterException('q', '') @@ -239,12 +246,23 @@ def get_search_query_from_webapp(preferences: Preferences, form: Dict[str, str]) query_engineref_list = parse_generic(preferences, form, disabled_engines) query_engineref_list = deduplicate_engineref_list(query_engineref_list) - query_engineref_list, query_engineref_list_unknown, query_engineref_list_notoken =\ - validate_engineref_list(query_engineref_list, preferences) - - return (SearchQuery(query, query_engineref_list, query_lang, query_safesearch, query_pageno, - query_time_range, query_timeout, external_bang=external_bang, - engine_data=engine_data), - raw_text_query, - query_engineref_list_unknown, - query_engineref_list_notoken) + query_engineref_list, query_engineref_list_unknown, query_engineref_list_notoken = validate_engineref_list( + query_engineref_list, preferences + ) + + return ( + SearchQuery( + query, + query_engineref_list, + query_lang, + query_safesearch, + query_pageno, + query_time_range, + query_timeout, + external_bang=external_bang, + engine_data=engine_data, + ), + raw_text_query, + query_engineref_list_unknown, + query_engineref_list_notoken, + ) diff --git a/searx/webapp.py b/searx/webapp.py index 7729eb538..a7812f181 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -146,11 +146,7 @@ STATS_SORT_PARAMETERS = { } # Flask app -app = Flask( - __name__, - static_folder=settings['ui']['static_path'], - template_folder=templates_path -) +app = Flask(__name__, static_folder=settings['ui']['static_path'], template_folder=templates_path) app.jinja_env.trim_blocks = True app.jinja_env.lstrip_blocks = True @@ -171,14 +167,10 @@ _category_names = ( gettext('news'), gettext('map'), gettext('onions'), - gettext('science') + gettext('science'), ) -_simple_style = ( - gettext('auto'), - gettext('light'), - gettext('dark') -) +_simple_style = (gettext('auto'), gettext('light'), gettext('dark')) # timeout_text = gettext('timeout') @@ -214,11 +206,15 @@ exception_classname_to_text = { # monkey patch for flask_babel.get_translations _flask_babel_get_translations = flask_babel.get_translations + + def _get_translations(): if has_request_context() and request.form.get('use-translation') == 'oc': babel_ext = flask_babel.current_app.extensions['babel'] return Translations.load(next(babel_ext.translation_directories), 'oc') return _flask_babel_get_translations() + + flask_babel.get_translations = _get_translations @@ -286,13 +282,10 @@ def code_highlighter(codelines, language=None): line_code_start = line # new codeblock is detected - if last_line is not None and\ - last_line + 1 != line: + if last_line is not None and last_line + 1 != line: # highlight last codepart - formatter = HtmlFormatter( - linenos='inline', linenostart=line_code_start, cssclass="code-highlight" - ) + formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight") html_code = html_code + highlight(tmp_code, lexer, formatter) # reset conditions for next codepart @@ -355,16 +348,9 @@ def proxify(url): url_params = dict(mortyurl=url.encode()) if settings['result_proxy'].get('key'): - url_params['mortyhash'] = hmac.new( - settings['result_proxy']['key'], - url.encode(), - hashlib.sha256 - ).hexdigest() - - return '{0}?{1}'.format( - settings['result_proxy']['url'], - urlencode(url_params) - ) + url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'], url.encode(), hashlib.sha256).hexdigest() + + return '{0}?{1}'.format(settings['result_proxy']['url'], urlencode(url_params)) def image_proxify(url): @@ -377,10 +363,12 @@ def image_proxify(url): if url.startswith('data:image/'): # 50 is an arbitrary number to get only the beginning of the image. - partial_base64 = url[len('data:image/'):50].split(';') - if len(partial_base64) == 2 \ - and partial_base64[0] in ['gif', 'png', 'jpeg', 'pjpeg', 'webp', 'tiff', 'bmp']\ - and partial_base64[1].startswith('base64,'): + partial_base64 = url[len('data:image/') : 50].split(';') + if ( + len(partial_base64) == 2 + and partial_base64[0] in ['gif', 'png', 'jpeg', 'pjpeg', 'webp', 'tiff', 'bmp'] + and partial_base64[1].startswith('base64,') + ): return url return None @@ -389,8 +377,7 @@ def image_proxify(url): h = new_hmac(settings['server']['secret_key'], url.encode()) - return '{0}?{1}'.format(url_for('image_proxy'), - urlencode(dict(url=url.encode(), h=h))) + return '{0}?{1}'.format(url_for('image_proxy'), urlencode(dict(url=url.encode(), h=h))) def get_translations(): @@ -412,7 +399,8 @@ def _get_enable_categories(all_categories): disabled_engines = request.preferences.engines.get_disabled() enabled_categories = set( # pylint: disable=consider-using-dict-items - category for engine_name in engines + category + for engine_name in engines for category in engines[engine_name].categories if (engine_name, category) not in disabled_engines ) @@ -423,10 +411,7 @@ def get_pretty_url(parsed_url): path = parsed_url.path path = path[:-1] if len(path) > 0 and path[-1] == '/' else path path = path.replace("/", " › ") - return [ - parsed_url.scheme + "://" + parsed_url.netloc, - path - ] + return [parsed_url.scheme + "://" + parsed_url.netloc, path] def render(template_name, override_theme=None, **kwargs): @@ -448,7 +433,7 @@ def render(template_name, override_theme=None, **kwargs): kwargs['categories'] = _get_enable_categories(kwargs['all_categories']) # i18n - kwargs['language_codes'] = [ l for l in languages if l[0] in settings['search']['languages'] ] + kwargs['language_codes'] = [l for l in languages if l[0] in settings['search']['languages']] kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) locale = request.preferences.get_value('locale') @@ -458,12 +443,11 @@ def render(template_name, override_theme=None, **kwargs): kwargs['rtl'] = True if 'current_language' not in kwargs: kwargs['current_language'] = match_language( - request.preferences.get_value('language'), settings['search']['languages'] ) + request.preferences.get_value('language'), settings['search']['languages'] + ) # values from settings - kwargs['search_formats'] = [ - x for x in settings['search']['formats'] if x != 'html' - ] + kwargs['search_formats'] = [x for x in settings['search']['formats'] if x != 'html'] kwargs['instance_name'] = get_setting('general.instance_name') kwargs['searx_version'] = VERSION_STRING kwargs['searx_git_url'] = GIT_URL @@ -477,9 +461,7 @@ def render(template_name, override_theme=None, **kwargs): kwargs['proxify_results'] = settings.get('result_proxy', {}).get('proxify_results', True) kwargs['get_result_template'] = get_result_template kwargs['opensearch_url'] = ( - url_for('opensearch') - + '?' - + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']}) + url_for('opensearch') + '?' + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']}) ) # scripts from plugins @@ -495,8 +477,7 @@ def render(template_name, override_theme=None, **kwargs): kwargs['styles'].add(css) start_time = default_timer() - result = render_template( - '{}/{}'.format(kwargs['theme'], template_name), **kwargs) + result = render_template('{}/{}'.format(kwargs['theme'], template_name), **kwargs) request.render_time += default_timer() - start_time # pylint: disable=assigning-non-slot return result @@ -541,7 +522,7 @@ def pre_request(): # language is defined neither in settings nor in preferences # use browser headers if not preferences.get_value("language"): - language = _get_browser_language(request, settings['search']['languages']) + language = _get_browser_language(request, settings['search']['languages']) preferences.parse_dict({"language": language}) # locale is defined neither in settings nor in preferences @@ -555,8 +536,7 @@ def pre_request(): allowed_plugins = preferences.plugins.get_enabled() disabled_plugins = preferences.plugins.get_disabled() for plugin in plugins: - if ((plugin.default_on and plugin.id not in disabled_plugins) - or plugin.id in allowed_plugins): + if (plugin.default_on and plugin.id not in disabled_plugins) or plugin.id in allowed_plugins: request.user_plugins.append(plugin) @@ -573,17 +553,20 @@ def add_default_headers(response): @app.after_request def post_request(response): total_time = default_timer() - request.start_time - timings_all = ['total;dur=' + str(round(total_time * 1000, 3)), - 'render;dur=' + str(round(request.render_time * 1000, 3))] + timings_all = [ + 'total;dur=' + str(round(total_time * 1000, 3)), + 'render;dur=' + str(round(request.render_time * 1000, 3)), + ] if len(request.timings) > 0: timings = sorted(request.timings, key=lambda v: v['total']) timings_total = [ - 'total_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['total'] * 1000, 3)) + 'total_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['total'] * 1000, 3)) for i, v in enumerate(timings) ] timings_load = [ 'load_' + str(i) + '_' + v['engine'] + ';dur=' + str(round(v['load'] * 1000, 3)) - for i, v in enumerate(timings) if v.get('load') + for i, v in enumerate(timings) + if v.get('load') ] timings_all = timings_all + timings_total + timings_load response.headers.add('Server-Timing', ', '.join(timings_all)) @@ -592,10 +575,7 @@ def post_request(response): def index_error(output_format, error_message): if output_format == 'json': - return Response( - json.dumps({'error': error_message}), - mimetype='application/json' - ) + return Response(json.dumps({'error': error_message}), mimetype='application/json') if output_format == 'csv': response = Response('', mimetype='application/csv') cont_disp = 'attachment;Filename=searx.csv' @@ -678,9 +658,7 @@ def search(): raw_text_query = None result_container = None try: - search_query, raw_text_query, _, _ = get_search_query_from_webapp( - request.preferences, request.form - ) + search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) # pylint: disable=redefined-outer-name @@ -736,10 +714,9 @@ def search(): if hours == 0: result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes) else: - result['publishedDate'] = gettext( - '{hours} hour(s), {minutes} minute(s) ago').format( - hours=hours, minutes=minutes - ) + result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format( + hours=hours, minutes=minutes + ) else: result['publishedDate'] = format_date(result['publishedDate']) @@ -752,11 +729,9 @@ def search(): 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), - 'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines) + 'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines), } - response = json.dumps( - x, default = lambda item: list(item) if isinstance(item, set) else item - ) + response = json.dumps(x, default=lambda item: list(item) if isinstance(item, set) else item) return Response(response, mimetype='application/json') if output_format == 'csv': @@ -800,21 +775,17 @@ def search(): # suggestions: use RawTextQuery to get the suggestion URLs with the same bang suggestion_urls = list( map( - lambda suggestion: { - 'url': raw_text_query.changeQuery(suggestion).getFullQuery(), - 'title': suggestion - }, - result_container.suggestions - )) + lambda suggestion: {'url': raw_text_query.changeQuery(suggestion).getFullQuery(), 'title': suggestion}, + result_container.suggestions, + ) + ) correction_urls = list( map( - lambda correction: { - 'url': raw_text_query.changeQuery(correction).getFullQuery(), - 'title': correction - }, - result_container.corrections - )) + lambda correction: {'url': raw_text_query.changeQuery(correction).getFullQuery(), 'title': correction}, + result_container.corrections, + ) + ) return render( # fmt: off @@ -899,9 +870,7 @@ def autocompleter(): language = language.split('-')[0] # run autocompletion - raw_results = search_autocomplete( - request.preferences.get_value('autocomplete'), sug_prefix, language - ) + raw_results = search_autocomplete(request.preferences.get_value('autocomplete'), sug_prefix, language) for result in raw_results: # attention: this loop will change raw_text_query object and this is # the reason why the sug_prefix was stored before (see above) @@ -952,16 +921,11 @@ def preferences(): allowed_plugins = request.preferences.plugins.get_enabled() # stats for preferences page - filtered_engines = dict( - filter( - lambda kv: (kv[0], request.preferences.validate_token(kv[1])), - engines.items() - ) - ) + filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items())) engines_by_category = {} - for c in categories: # pylint: disable=consider-using-dict-items + for c in categories: # pylint: disable=consider-using-dict-items engines_by_category[c] = [e for e in categories[c] if e.name in filtered_engines] # sort the engines alphabetically since the order in settings.yml is meaningless. list.sort(engines_by_category[c], key=lambda e: e.name) @@ -996,8 +960,9 @@ def preferences(): reliabilities = {} engine_errors = get_engine_errors(filtered_engines) checker_results = checker_get_result() - checker_results = checker_results['engines'] \ - if checker_results['status'] == 'ok' and 'engines' in checker_results else {} + checker_results = ( + checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} + ) for _, e in filtered_engines.items(): checker_result = checker_results.get(e.name, {}) checker_success = checker_result.get('success', True) @@ -1089,10 +1054,7 @@ def _is_selected_language_supported(engine, preferences): # pylint: disable=red if language == 'all': return True x = match_language( - language, - getattr(engine, 'supported_languages', []), - getattr(engine, 'language_aliases', {}), - None + language, getattr(engine, 'supported_languages', []), getattr(engine, 'language_aliases', {}), None ) return bool(x) @@ -1121,15 +1083,9 @@ def image_proxy(): 'DNT': '1', } set_context_network_name('image_proxy') - resp, stream = http_stream( - method = 'GET', - url = url, - headers = request_headers - ) + resp, stream = http_stream(method='GET', url=url, headers=request_headers) content_length = resp.headers.get('Content-Length') - if (content_length - and content_length.isdigit() - and int(content_length) > maximum_size ): + if content_length and content_length.isdigit() and int(content_length) > maximum_size: return 'Max size', 400 if resp.status_code != 200: @@ -1165,15 +1121,8 @@ def image_proxy(): logger.debug('Exception while closing response', e) try: - headers = dict_subset( - resp.headers, - {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'} - ) - response = Response( - stream, - mimetype=resp.headers['Content-Type'], - headers=headers, - direct_passthrough=True) + headers = dict_subset(resp.headers, {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'}) + response = Response(stream, mimetype=resp.headers['Content-Type'], headers=headers, direct_passthrough=True) response.call_on_close(close_stream) return response except httpx.HTTPError: @@ -1189,11 +1138,11 @@ def engine_descriptions(): for engine, description in ENGINE_DESCRIPTIONS.get(locale, {}).items(): result[engine] = description for engine, description in result.items(): - if len(description) ==2 and description[1] == 'ref': + if len(description) == 2 and description[1] == 'ref': ref_engine, ref_lang = description[0].split(':') description = ENGINE_DESCRIPTIONS[ref_lang][ref_engine] if isinstance(description, str): - description = [ description, 'wikipedia' ] + description = [description, 'wikipedia'] result[engine] = description return jsonify(result) @@ -1204,11 +1153,7 @@ def stats(): sort_order = request.args.get('sort', default='name', type=str) selected_engine_name = request.args.get('engine', default=None, type=str) - filtered_engines = dict( - filter( - lambda kv: (kv[0], request.preferences.validate_token(kv[1])), - engines.items() - )) + filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items())) if selected_engine_name: if selected_engine_name not in filtered_engines: selected_engine_name = None @@ -1217,8 +1162,7 @@ def stats(): checker_results = checker_get_result() checker_results = ( - checker_results['engines'] - if checker_results['status'] == 'ok' and 'engines' in checker_results else {} + checker_results['engines'] if checker_results['status'] == 'ok' and 'engines' in checker_results else {} ) engine_stats = get_engines_stats(filtered_engines) @@ -1256,11 +1200,7 @@ def stats(): @app.route('/stats/errors', methods=['GET']) def stats_errors(): - filtered_engines = dict( - filter( - lambda kv: (kv[0], request.preferences.validate_token(kv[1])), - engines.items() - )) + filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items())) result = get_engine_errors(filtered_engines) return jsonify(result) @@ -1273,13 +1213,16 @@ def stats_checker(): @app.route('/robots.txt', methods=['GET']) def robots(): - return Response("""User-agent: * + return Response( + """User-agent: * Allow: / Allow: /about Disallow: /stats Disallow: /preferences Disallow: /*?*q=* -""", mimetype='text/plain') +""", + mimetype='text/plain', + ) @app.route('/opensearch.xml', methods=['GET']) @@ -1293,34 +1236,21 @@ def opensearch(): if request.headers.get('User-Agent', '').lower().find('webkit') >= 0: method = 'get' - ret = render( - 'opensearch.xml', - opensearch_method=method, - override_theme='__common__' - ) + ret = render('opensearch.xml', opensearch_method=method, override_theme='__common__') - resp = Response( - response = ret, - status = 200, - mimetype = "application/opensearchdescription+xml" - ) + resp = Response(response=ret, status=200, mimetype="application/opensearchdescription+xml") return resp @app.route('/favicon.ico') def favicon(): return send_from_directory( - os.path.join( - app.root_path, - settings['ui']['static_path'], - 'themes', - get_current_theme_name(), - 'img' - ), + os.path.join(app.root_path, settings['ui']['static_path'], 'themes', get_current_theme_name(), 'img'), 'favicon.png', - mimetype = 'image/vnd.microsoft.icon' + mimetype='image/vnd.microsoft.icon', ) + @app.route('/clear_cookies') def clear_cookies(): resp = make_response(redirect(url_for('index', _external=True))) @@ -1341,43 +1271,47 @@ def config(): if isinstance(engine.supported_languages, dict): supported_languages = list(engine.supported_languages.keys()) - _engines.append({ - 'name': name, - 'categories': engine.categories, - 'shortcut': engine.shortcut, - 'enabled': not engine.disabled, - 'paging': engine.paging, - 'language_support': engine.language_support, - 'supported_languages': supported_languages, - 'safesearch': engine.safesearch, - 'time_range_support': engine.time_range_support, - 'timeout': engine.timeout - }) + _engines.append( + { + 'name': name, + 'categories': engine.categories, + 'shortcut': engine.shortcut, + 'enabled': not engine.disabled, + 'paging': engine.paging, + 'language_support': engine.language_support, + 'supported_languages': supported_languages, + 'safesearch': engine.safesearch, + 'time_range_support': engine.time_range_support, + 'timeout': engine.timeout, + } + ) _plugins = [] for _ in plugins: _plugins.append({'name': _.name, 'enabled': _.default_on}) - return jsonify({ - 'categories': list(categories.keys()), - 'engines': _engines, - 'plugins': _plugins, - 'instance_name': settings['general']['instance_name'], - 'locales': LOCALE_NAMES, - 'default_locale': settings['ui']['default_locale'], - 'autocomplete': settings['search']['autocomplete'], - 'safe_search': settings['search']['safe_search'], - 'default_theme': settings['ui']['default_theme'], - 'version': VERSION_STRING, - 'brand': { - 'CONTACT_URL': get_setting('general.contact_url'), - 'GIT_URL': GIT_URL, - 'GIT_BRANCH': GIT_BRANCH, - 'DOCS_URL': get_setting('brand.docs_url'), - }, - 'doi_resolvers': list(settings['doi_resolvers'].keys()), - 'default_doi_resolver': settings['default_doi_resolver'], - }) + return jsonify( + { + 'categories': list(categories.keys()), + 'engines': _engines, + 'plugins': _plugins, + 'instance_name': settings['general']['instance_name'], + 'locales': LOCALE_NAMES, + 'default_locale': settings['ui']['default_locale'], + 'autocomplete': settings['search']['autocomplete'], + 'safe_search': settings['search']['safe_search'], + 'default_theme': settings['ui']['default_theme'], + 'version': VERSION_STRING, + 'brand': { + 'CONTACT_URL': get_setting('general.contact_url'), + 'GIT_URL': GIT_URL, + 'GIT_BRANCH': GIT_BRANCH, + 'DOCS_URL': get_setting('brand.docs_url'), + }, + 'doi_resolvers': list(settings['doi_resolvers'].keys()), + 'default_doi_resolver': settings['default_doi_resolver'], + } + ) @app.errorhandler(404) @@ -1388,9 +1322,7 @@ def page_not_found(_e): # see https://flask.palletsprojects.com/en/1.1.x/cli/ # True if "FLASK_APP=searx/webapp.py FLASK_ENV=development flask run" flask_run_development = ( - os.environ.get("FLASK_APP") is not None - and os.environ.get("FLASK_ENV") == 'development' - and is_flask_run_cmdline() + os.environ.get("FLASK_APP") is not None and os.environ.get("FLASK_ENV") == 'development' and is_flask_run_cmdline() ) # True if reload feature is activated of werkzeug, False otherwise (including uwsgi, etc..) @@ -1399,30 +1331,23 @@ flask_run_development = ( werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__main__") # initialize the engines except on the first run of the werkzeug server. -if (not werkzeug_reloader - or (werkzeug_reloader - and os.environ.get("WERKZEUG_RUN_MAIN") == "true") ): +if not werkzeug_reloader or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"): plugin_initialize(app) search_initialize(enable_checker=True, check_network=True) def run(): - logger.debug( - 'starting webserver on %s:%s', - settings['server']['bind_address'], - settings['server']['port'] - ) + logger.debug('starting webserver on %s:%s', settings['server']['bind_address'], settings['server']['port']) app.run( - debug = searx_debug, - use_debugger = searx_debug, - port = settings['server']['port'], - host = settings['server']['bind_address'], - threaded = True, - extra_files = [ - get_default_settings_path() - ], + debug=searx_debug, + use_debugger=searx_debug, + port=settings['server']['port'], + host=settings['server']['bind_address'], + threaded=True, + extra_files=[get_default_settings_path()], ) + application = app patch_application(app) diff --git a/searx/webutils.py b/searx/webutils.py index c27324908..737e5a82f 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -106,8 +106,7 @@ def highlight_content(content, query): if content.lower().find(query.lower()) > -1: query_regex = '({0})'.format(re.escape(query)) - content = re.sub(query_regex, '<span class="highlight">\\1</span>', - content, flags=re.I | re.U) + content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U) else: regex_parts = [] for chunk in query.split(): @@ -119,8 +118,7 @@ def highlight_content(content, query): else: regex_parts.append('{0}'.format(re.escape(chunk))) query_regex = '({0})'.format('|'.join(regex_parts)) - content = re.sub(query_regex, '<span class="highlight">\\1</span>', - content, flags=re.I | re.U) + content = re.sub(query_regex, '<span class="highlight">\\1</span>', content, flags=re.I | re.U) return content diff --git a/searxng_extra/standalone_searx.py b/searxng_extra/standalone_searx.py index de8a0d77f..9ac8c8af2 100755 --- a/searxng_extra/standalone_searx.py +++ b/searxng_extra/standalone_searx.py @@ -62,7 +62,7 @@ Example to run it from python: }, "suggestions": [...] } -""" # pylint: disable=line-too-long +""" # pylint: disable=line-too-long import argparse import sys @@ -80,7 +80,7 @@ EngineCategoriesVar = Optional[List[str]] def get_search_query( - args: argparse.Namespace, engine_categories: EngineCategoriesVar = None + args: argparse.Namespace, engine_categories: EngineCategoriesVar = None ) -> searx.search.SearchQuery: """Get search results for the query""" if engine_categories is None: @@ -94,14 +94,12 @@ def get_search_query( "categories": category, "pageno": str(args.pageno), "language": args.lang, - "time_range": args.timerange + "time_range": args.timerange, } - preferences = searx.preferences.Preferences( - ['oscar'], engine_categories, searx.engines.engines, []) + preferences = searx.preferences.Preferences(['oscar'], engine_categories, searx.engines.engines, []) preferences.key_value_settings['safesearch'].parse(args.safesearch) - search_query = searx.webadapter.get_search_query_from_webapp( - preferences, form)[0] + search_query = searx.webadapter.get_search_query_from_webapp(preferences, form)[0] return search_query @@ -143,14 +141,13 @@ def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]: "suggestions": list(result_container.suggestions), "answers": list(result_container.answers), "paging": result_container.paging, - "results_number": result_container.results_number() + "results_number": result_container.results_number(), } return result_container_json def parse_argument( - args: Optional[List[str]]=None, - category_choices: EngineCategoriesVar=None + args: Optional[List[str]] = None, category_choices: EngineCategoriesVar = None ) -> argparse.Namespace: """Parse command line. @@ -174,24 +171,23 @@ def parse_argument( if not category_choices: category_choices = list(searx.engines.categories.keys()) parser = argparse.ArgumentParser(description='Standalone searx.') - parser.add_argument('query', type=str, - help='Text query') - parser.add_argument('--category', type=str, nargs='?', - choices=category_choices, - default='general', - help='Search category') - parser.add_argument('--lang', type=str, nargs='?', default='all', - help='Search language') - parser.add_argument('--pageno', type=int, nargs='?', default=1, - help='Page number starting from 1') + parser.add_argument('query', type=str, help='Text query') parser.add_argument( - '--safesearch', type=str, nargs='?', - choices=['0', '1', '2'], default='0', - help='Safe content filter from none to strict') + '--category', type=str, nargs='?', choices=category_choices, default='general', help='Search category' + ) + parser.add_argument('--lang', type=str, nargs='?', default='all', help='Search language') + parser.add_argument('--pageno', type=int, nargs='?', default=1, help='Page number starting from 1') parser.add_argument( - '--timerange', type=str, - nargs='?', choices=['day', 'week', 'month', 'year'], - help='Filter by time range') + '--safesearch', + type=str, + nargs='?', + choices=['0', '1', '2'], + default='0', + help='Safe content filter from none to strict', + ) + parser.add_argument( + '--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'], help='Filter by time range' + ) return parser.parse_args(args) @@ -206,6 +202,4 @@ if __name__ == '__main__': searx.search.initialize_processors(settings_engines) search_q = get_search_query(prog_args, engine_categories=engine_cs) res_dict = to_dict(search_q) - sys.stdout.write(dumps( - res_dict, sort_keys=True, indent=4, ensure_ascii=False, - default=json_serial)) + sys.stdout.write(dumps(res_dict, sort_keys=True, indent=4, ensure_ascii=False, default=json_serial)) diff --git a/searxng_extra/update/update_currencies.py b/searxng_extra/update/update_currencies.py index 93ac15aa5..3373e2455 100755 --- a/searxng_extra/update/update_currencies.py +++ b/searxng_extra/update/update_currencies.py @@ -85,9 +85,7 @@ def add_currency_label(db, label, iso4217, language): def wikidata_request_result_iterator(request): - result = wikidata.send_wikidata_query( - request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) - ) + result = wikidata.send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) if result is not None: for r in result['results']['bindings']: yield r @@ -151,5 +149,6 @@ def main(): with open(get_filename(), 'w', encoding='utf8') as f: json.dump(db, f, ensure_ascii=False, indent=4) + if __name__ == '__main__': main() diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py index 2e16e1029..51cfc7cc2 100755 --- a/searxng_extra/update/update_engine_descriptions.py +++ b/searxng_extra/update/update_engine_descriptions.py @@ -56,7 +56,8 @@ NOT_A_DESCRIPTION = [ SKIP_ENGINE_SOURCE = [ # fmt: off - ('gitlab', 'wikidata') # descriptions are about wikipedia disambiguation pages + ('gitlab', 'wikidata') + # descriptions are about wikipedia disambiguation pages # fmt: on ] @@ -94,10 +95,7 @@ def update_description(engine_name, lang, description, source, replace=True): def get_wikipedia_summary(lang, pageid): - params = { - 'language': lang.replace('_','-'), - 'headers': {} - } + params = {'language': lang.replace('_', '-'), 'headers': {}} searx.engines.engines['wikipedia'].request(pageid, params) try: response = searx.network.get(params['url'], headers=params['headers'], timeout=10) @@ -162,10 +160,7 @@ def initialize(): global IDS, WIKIPEDIA_LANGUAGES, LANGUAGES_SPARQL searx.search.initialize() wikipedia_engine = searx.engines.engines['wikipedia'] - WIKIPEDIA_LANGUAGES = { - language: wikipedia_engine.url_lang(language.replace('_', '-')) - for language in LANGUAGES - } + WIKIPEDIA_LANGUAGES = {language: wikipedia_engine.url_lang(language.replace('_', '-')) for language in LANGUAGES} WIKIPEDIA_LANGUAGES['nb_NO'] = 'no' LANGUAGES_SPARQL = ', '.join(f"'{l}'" for l in set(WIKIPEDIA_LANGUAGES.values())) for engine_name, engine in searx.engines.engines.items(): @@ -180,9 +175,7 @@ def initialize(): def fetch_wikidata_descriptions(): searx.network.set_timeout_for_thread(60) result = wikidata.send_wikidata_query( - SPARQL_DESCRIPTION - .replace('%IDS%', IDS) - .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) + SPARQL_DESCRIPTION.replace('%IDS%', IDS).replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) ) if result is not None: for binding in result['results']['bindings']: @@ -197,9 +190,7 @@ def fetch_wikidata_descriptions(): def fetch_wikipedia_descriptions(): result = wikidata.send_wikidata_query( - SPARQL_WIKIPEDIA_ARTICLE - .replace('%IDS%', IDS) - .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) + SPARQL_WIKIPEDIA_ARTICLE.replace('%IDS%', IDS).replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) ) if result is not None: for binding in result['results']['bindings']: @@ -226,9 +217,9 @@ def fetch_website_description(engine_name, website): # the front page can't be fetched: skip this engine return - wikipedia_languages_r = { V: K for K, V in WIKIPEDIA_LANGUAGES.items() } + wikipedia_languages_r = {V: K for K, V in WIKIPEDIA_LANGUAGES.items()} languages = ['en', 'es', 'pt', 'ru', 'tr', 'fr'] - languages = languages + [ l for l in LANGUAGES if l not in languages] + languages = languages + [l for l in LANGUAGES if l not in languages] previous_matched_lang = None previous_count = 0 @@ -281,9 +272,7 @@ def get_output(): * description (if source = "wikipedia") * [f"engine:lang", "ref"] (reference to another existing description) """ - output = { - locale: {} for locale in LOCALE_NAMES - } + output = {locale: {} for locale in LOCALE_NAMES} seen_descriptions = {} diff --git a/searxng_extra/update/update_external_bangs.py b/searxng_extra/update/update_external_bangs.py index 27882bb24..d5c6b585a 100755 --- a/searxng_extra/update/update_external_bangs.py +++ b/searxng_extra/update/update_external_bangs.py @@ -113,13 +113,13 @@ def parse_ddg_bangs(ddg_bangs): # only for the https protocol: "https://example.com" becomes "//example.com" if bang_url.startswith(HTTPS_COLON + '//'): - bang_url = bang_url[len(HTTPS_COLON):] + bang_url = bang_url[len(HTTPS_COLON) :] # - if bang_url.startswith(HTTP_COLON + '//') and bang_url[len(HTTP_COLON):] in bang_urls: + if bang_url.startswith(HTTP_COLON + '//') and bang_url[len(HTTP_COLON) :] in bang_urls: # if the bang_url uses the http:// protocol, and the same URL exists in https:// # then reuse the https:// bang definition. (written //example.com) - bang_def_output = bang_urls[bang_url[len(HTTP_COLON):]] + bang_def_output = bang_urls[bang_url[len(HTTP_COLON) :]] else: # normal use case : new http:// URL or https:// URL (without "https:", see above) bang_rank = str(bang_definition['r']) @@ -151,9 +151,6 @@ def get_bangs_filename(): if __name__ == '__main__': bangs_url, bangs_version = get_bang_url() print(f'fetch bangs from {bangs_url}') - output = { - 'version': bangs_version, - 'trie': parse_ddg_bangs(fetch_ddg_bangs(bangs_url)) - } + output = {'version': bangs_version, 'trie': parse_ddg_bangs(fetch_ddg_bangs(bangs_url))} with open(get_bangs_filename(), 'w', encoding="utf8") as fp: json.dump(output, fp, ensure_ascii=False, indent=4) diff --git a/searxng_extra/update/update_firefox_version.py b/searxng_extra/update/update_firefox_version.py index 3b536f296..750e955fd 100755 --- a/searxng_extra/update/update_firefox_version.py +++ b/searxng_extra/update/update_firefox_version.py @@ -40,7 +40,7 @@ def fetch_firefox_versions(): url = urlparse(urljoin(URL, link)) path = url.path if path.startswith(RELEASE_PATH): - version = path[len(RELEASE_PATH):-1] + version = path[len(RELEASE_PATH) : -1] if NORMAL_REGEX.match(version): versions.append(LooseVersion(version)) diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index 2d7ffc104..526469342 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -31,8 +31,7 @@ def fetch_supported_languages(): for engine_name in names: if hasattr(engines[engine_name], 'fetch_supported_languages'): engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() - print("fetched %s languages from engine %s" % ( - len(engines_languages[engine_name]), engine_name)) + print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name)) if type(engines_languages[engine_name]) == list: engines_languages[engine_name] = sorted(engines_languages[engine_name]) @@ -60,8 +59,9 @@ def join_language_lists(engines_languages): # apply custom fixes if necessary if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values(): - lang_code = next(lc for lc, alias in engines[engine_name].language_aliases.items() - if lang_code == alias) + lang_code = next( + lc for lc, alias in engines[engine_name].language_aliases.items() if lang_code == alias + ) locale = get_locale(lang_code) @@ -85,10 +85,12 @@ def join_language_lists(engines_languages): english_name = None # add language to list - language_list[short_code] = {'name': language_name, - 'english_name': english_name, - 'counter': set(), - 'countries': dict()} + language_list[short_code] = { + 'name': language_name, + 'english_name': english_name, + 'counter': set(), + 'countries': dict(), + } # add language with country if not in list if lang_code != short_code and lang_code not in language_list[short_code]['countries']: @@ -97,8 +99,7 @@ def join_language_lists(engines_languages): # get country name from babel's Locale object country_name = locale.get_territory_name() - language_list[short_code]['countries'][lang_code] = {'country_name': country_name, - 'counter': set()} + language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()} # count engine for both language_country combination and language alone language_list[short_code]['counter'].add(engine_name) @@ -112,17 +113,23 @@ def join_language_lists(engines_languages): def filter_language_list(all_languages): min_engines_per_lang = 13 min_engines_per_country = 7 - main_engines = [engine_name for engine_name in engines.keys() - if 'general' in engines[engine_name].categories and - engines[engine_name].supported_languages and - not engines[engine_name].disabled] + main_engines = [ + engine_name + for engine_name in engines.keys() + if 'general' in engines[engine_name].categories + and engines[engine_name].supported_languages + and not engines[engine_name].disabled + ] # filter list to include only languages supported by most engines or all default general engines - filtered_languages = {code: lang for code, lang - in all_languages.items() - if (len(lang['counter']) >= min_engines_per_lang or - all(main_engine in lang['counter'] - for main_engine in main_engines))} + filtered_languages = { + code: lang + for code, lang in all_languages.items() + if ( + len(lang['counter']) >= min_engines_per_lang + or all(main_engine in lang['counter'] for main_engine in main_engines) + ) + } def _copy_lang_data(lang, country_name=None): new_dict = dict() @@ -176,22 +183,24 @@ def write_languages_file(languages): "# -*- coding: utf-8 -*-", "# list of language codes", "# this file is generated automatically by utils/fetch_languages.py", - "language_codes =" + "language_codes =", ) - language_codes = tuple([ - ( - code, - languages[code]['name'].split(' (')[0], - languages[code].get('country_name') or '', - languages[code].get('english_name') or '' - ) for code in sorted(languages) - ]) + language_codes = tuple( + [ + ( + code, + languages[code]['name'].split(' (')[0], + languages[code].get('country_name') or '', + languages[code].get('english_name') or '', + ) + for code in sorted(languages) + ] + ) with open(languages_file, 'w') as new_file: file_content = "{file_headers} \\\n{language_codes}".format( - file_headers='\n'.join(file_headers), - language_codes=pformat(language_codes, indent=4) + file_headers='\n'.join(file_headers), language_codes=pformat(language_codes, indent=4) ) new_file.write(file_content) new_file.close() diff --git a/searxng_extra/update/update_osm_keys_tags.py b/searxng_extra/update/update_osm_keys_tags.py index 77c715ba7..2916cbff1 100755 --- a/searxng_extra/update/update_osm_keys_tags.py +++ b/searxng_extra/update/update_osm_keys_tags.py @@ -84,9 +84,8 @@ PRESET_KEYS = { ('internet_access', 'ssid'): {'en': 'Wi-Fi'}, } -INCLUDED_KEYS = { - ('addr', ) -} +INCLUDED_KEYS = {('addr',)} + def get_preset_keys(): results = collections.OrderedDict() @@ -97,6 +96,7 @@ def get_preset_keys(): r.setdefault('*', value) return results + def get_keys(): results = get_preset_keys() response = wikidata.send_wikidata_query(SPARQL_KEYS_REQUEST) @@ -110,18 +110,16 @@ def get_keys(): # label for the key "contact.email" is "Email" # whatever the language r = results.setdefault('contact', {}) - r[keys[1]] = { - '*': { - 'en': keys[1] - } - } + r[keys[1]] = {'*': {'en': keys[1]}} continue if tuple(keys) in PRESET_KEYS: # skip presets (already set above) continue - if get_key_rank(':'.join(keys)) is None\ - and ':'.join(keys) not in VALUE_TO_LINK\ - and tuple(keys) not in INCLUDED_KEYS: + if ( + get_key_rank(':'.join(keys)) is None + and ':'.join(keys) not in VALUE_TO_LINK + and tuple(keys) not in INCLUDED_KEYS + ): # keep only keys that will be displayed by openstreetmap.py continue label = key['itemLabel']['value'].lower() @@ -160,6 +158,7 @@ def get_tags(): results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label) return results + def optimize_data_lang(translations): language_to_delete = [] # remove "zh-hk" entry if the value is the same as "zh" @@ -184,12 +183,14 @@ def optimize_data_lang(translations): for language in language_to_delete: del translations[language] + def optimize_tags(data): for v in data.values(): for translations in v.values(): optimize_data_lang(translations) return data + def optimize_keys(data): for k, v in data.items(): if k == '*': @@ -198,9 +199,11 @@ def optimize_keys(data): optimize_keys(v) return data + def get_osm_tags_filename(): return Path(searx_dir) / "data" / "osm_keys_tags.json" + if __name__ == '__main__': set_timeout_for_thread(60) diff --git a/searxng_extra/update/update_pygments.py b/searxng_extra/update/update_pygments.py index fee63faa5..68aaad0f7 100755 --- a/searxng_extra/update/update_pygments.py +++ b/searxng_extra/update/update_pygments.py @@ -120,7 +120,7 @@ def get_css(cssclass, style): css_text = HtmlFormatter(style=style).get_style_defs(cssclass) result += cssclass + RULE_CODE_LINENOS + '\n\n' for line in css_text.splitlines(): - if ' ' in line and not line.startswith(cssclass): + if ' ' in line and not line.startswith(cssclass): line = cssclass + ' ' + line result += line + '\n' return result diff --git a/tests/__init__.py b/tests/__init__.py index c823cec87..8399f0604 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -50,5 +50,6 @@ class SearxTestCase(aiounittest.AsyncTestCase): def cleanup_patch(): setattr(obj, attr, previous_value) + self.addCleanup(cleanup_patch) setattr(obj, attr, value) diff --git a/tests/robot/__main__.py b/tests/robot/__main__.py index d4d6642a9..758a521ee 100644 --- a/tests/robot/__main__.py +++ b/tests/robot/__main__.py @@ -16,7 +16,7 @@ import tests as searx_tests from tests.robot import test_webapp -class SearxRobotLayer(): +class SearxRobotLayer: """Searx Robot Test Layer""" def setUp(self): @@ -42,9 +42,7 @@ class SearxRobotLayer(): # run the server self.server = subprocess.Popen( # pylint: disable=consider-using-with - [exe, webapp], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT + [exe, webapp], stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) if hasattr(self.server.stdout, 'read1'): print(self.server.stdout.read1(1024).decode()) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index d1c97ec81..9094e836a 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -2,5 +2,4 @@ import os from os.path import dirname, sep, abspath # In unit tests the user settings from unit/settings/test_settings.yml are used. -os.environ['SEARXNG_SETTINGS_PATH'] = abspath( - dirname(__file__) + sep + 'settings' + sep + 'test_settings.yml') +os.environ['SEARXNG_SETTINGS_PATH'] = abspath(dirname(__file__) + sep + 'settings' + sep + 'test_settings.yml') diff --git a/tests/unit/engines/test_command.py b/tests/unit/engines/test_command.py index d2bb123f7..7876dd55e 100644 --- a/tests/unit/engines/test_command.py +++ b/tests/unit/engines/test_command.py @@ -114,7 +114,6 @@ INFO:werkzeug: * Debugger PIN: 299-578-362''' 'template': 'key-value.html', 'level': 'DEBUG', }, - ], [ { @@ -136,7 +135,6 @@ INFO:werkzeug: * Debugger PIN: 299-578-362''' 'level': 'INFO', }, ], - ] for i in [0, 1]: @@ -171,7 +169,7 @@ commit ''' 'commit': '\w{40}', 'author': '[\w* ]* <\w*@?\w*\.?\w*>', 'date': 'Date: .*', - 'message': '\n\n.*$' + 'message': '\n\n.*$', } expected_results = [ { @@ -195,7 +193,6 @@ commit ''' 'message': '\n\nthird interesting message', 'template': 'key-value.html', }, - ] results = git_log_engine.search(''.encode('utf-8'), {'pageno': 1}) diff --git a/tests/unit/engines/test_xpath.py b/tests/unit/engines/test_xpath.py index 287beeab4..e616ff025 100644 --- a/tests/unit/engines/test_xpath.py +++ b/tests/unit/engines/test_xpath.py @@ -6,7 +6,6 @@ from tests import SearxTestCase class TestXpathEngine(SearxTestCase): - def test_request(self): xpath.search_url = 'https://url.com/{query}' xpath.categories = [] diff --git a/tests/unit/network/test_network.py b/tests/unit/network/test_network.py index 02628760b..d25a0d77b 100644 --- a/tests/unit/network/test_network.py +++ b/tests/unit/network/test_network.py @@ -9,7 +9,6 @@ from tests import SearxTestCase class TestNetwork(SearxTestCase): - def setUp(self): initialize() @@ -51,23 +50,23 @@ class TestNetwork(SearxTestCase): network = Network(proxies='http://localhost:1337') self.assertEqual(next(network._proxies_cycle), (('all://', 'http://localhost:1337'),)) - network = Network(proxies={ - 'https': 'http://localhost:1337', - 'http': 'http://localhost:1338' - }) - self.assertEqual(next(network._proxies_cycle), - (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338'))) - self.assertEqual(next(network._proxies_cycle), - (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338'))) - - network = Network(proxies={ - 'https': ['http://localhost:1337', 'http://localhost:1339'], - 'http': 'http://localhost:1338' - }) - self.assertEqual(next(network._proxies_cycle), - (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338'))) - self.assertEqual(next(network._proxies_cycle), - (('https://', 'http://localhost:1339'), ('http://', 'http://localhost:1338'))) + network = Network(proxies={'https': 'http://localhost:1337', 'http': 'http://localhost:1338'}) + self.assertEqual( + next(network._proxies_cycle), (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338')) + ) + self.assertEqual( + next(network._proxies_cycle), (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338')) + ) + + network = Network( + proxies={'https': ['http://localhost:1337', 'http://localhost:1339'], 'http': 'http://localhost:1338'} + ) + self.assertEqual( + next(network._proxies_cycle), (('https://', 'http://localhost:1337'), ('http://', 'http://localhost:1338')) + ) + self.assertEqual( + next(network._proxies_cycle), (('https://', 'http://localhost:1339'), ('http://', 'http://localhost:1338')) + ) with self.assertRaises(ValueError): Network(proxies=1) @@ -134,6 +133,7 @@ class TestNetworkRequestRetries(SearxTestCase): first = False return httpx.Response(status_code=403, text=TestNetworkRequestRetries.TEXT) return httpx.Response(status_code=200, text=TestNetworkRequestRetries.TEXT) + return get_response async def test_retries_ok(self): @@ -206,6 +206,7 @@ class TestNetworkStreamRetries(SearxTestCase): first = False raise httpx.RequestError('fake exception', request=None) return httpx.Response(status_code=200, text=TestNetworkStreamRetries.TEXT) + return stream async def test_retries_ok(self): diff --git a/tests/unit/test_answerers.py b/tests/unit/test_answerers.py index 1119b697d..73148f327 100644 --- a/tests/unit/test_answerers.py +++ b/tests/unit/test_answerers.py @@ -7,7 +7,6 @@ from tests import SearxTestCase class AnswererTest(SearxTestCase): - def test_unicode_input(self): query = Mock() unicode_payload = 'árvíztűrő tükörfúrógép' diff --git a/tests/unit/test_engines_init.py b/tests/unit/test_engines_init.py index dffeaf8e8..c72f5c8e5 100644 --- a/tests/unit/test_engines_init.py +++ b/tests/unit/test_engines_init.py @@ -3,15 +3,16 @@ from tests import SearxTestCase class TestEnginesInit(SearxTestCase): - @classmethod def tearDownClass(cls): settings['outgoing']['using_tor_proxy'] = False settings['outgoing']['extra_proxy_timeout'] = 0 def test_initialize_engines_default(self): - engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}, - {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2'}] + engine_list = [ + {'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}, + {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2'}, + ] engines.load_engines(engine_list) self.assertEqual(len(engines.engines), 2) @@ -20,8 +21,10 @@ class TestEnginesInit(SearxTestCase): def test_initialize_engines_exclude_onions(self): settings['outgoing']['using_tor_proxy'] = False - engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'}, - {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}] + engine_list = [ + {'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'}, + {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}, + ] engines.load_engines(engine_list) self.assertEqual(len(engines.engines), 1) @@ -31,9 +34,17 @@ class TestEnginesInit(SearxTestCase): def test_initialize_engines_include_onions(self): settings['outgoing']['using_tor_proxy'] = True settings['outgoing']['extra_proxy_timeout'] = 100.0 - engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general', - 'timeout': 20.0, 'onion_url': 'http://engine1.onion'}, - {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}] + engine_list = [ + { + 'engine': 'dummy', + 'name': 'engine1', + 'shortcut': 'e1', + 'categories': 'general', + 'timeout': 20.0, + 'onion_url': 'http://engine1.onion', + }, + {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}, + ] engines.load_engines(engine_list) self.assertEqual(len(engines.engines), 2) diff --git a/tests/unit/test_external_bangs.py b/tests/unit/test_external_bangs.py index 68b3b5a78..698ce36c6 100644 --- a/tests/unit/test_external_bangs.py +++ b/tests/unit/test_external_bangs.py @@ -18,9 +18,9 @@ TEST_DB = { 's': { 'on': 'season' + chr(2) + chr(1) + '0', 'capes': 'seascape' + chr(2) + chr(1) + '0', - } + }, }, - 'error': ['error in external_bangs.json'] + 'error': ['error in external_bangs.json'], } } @@ -57,7 +57,6 @@ class TestGetNode(SearxTestCase): class TestResolveBangDefinition(SearxTestCase): - def test_https(self): url, rank = resolve_bang_definition('//example.com/' + chr(2) + chr(1) + '42', 'query') self.assertEqual(url, 'https://example.com/query') @@ -70,7 +69,6 @@ class TestResolveBangDefinition(SearxTestCase): class TestGetBangDefinitionAndAutocomplete(SearxTestCase): - def test_found(self): bang_definition, new_autocomplete = get_bang_definition_and_autocomplete('exam', external_bangs_db=TEST_DB) self.assertEqual(bang_definition, TEST_DB['trie']['exam']['*']) @@ -103,7 +101,6 @@ class TestGetBangDefinitionAndAutocomplete(SearxTestCase): class TestExternalBangJson(SearxTestCase): - def test_no_external_bang_query(self): result = get_bang_url(SearchQuery('test', engineref_list=[EngineRef('wikipedia', 'general')])) self.assertEqual(result, None) diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 5bad4e5c4..28df835e5 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -6,18 +6,16 @@ from tests import SearxTestCase def get_search_mock(query, **kwargs): - return Mock(search_query=Mock(query=query, **kwargs), - result_container=Mock(answers=dict())) + return Mock(search_query=Mock(query=query, **kwargs), result_container=Mock(answers=dict())) -class PluginMock(): +class PluginMock: default_on = False name = 'Default plugin' description = 'Default plugin description' class PluginStoreTest(SearxTestCase): - def test_PluginStore_init(self): store = plugins.PluginStore() self.assertTrue(isinstance(store.plugins, list) and len(store.plugins) == 0) @@ -44,7 +42,6 @@ class PluginStoreTest(SearxTestCase): class SelfIPTest(SearxTestCase): - def test_PluginStore_init(self): plugin = plugins.load_and_initialize_plugin('searx.plugins.self_info', False, (None, {})) store = plugins.PluginStore() @@ -93,7 +90,6 @@ class SelfIPTest(SearxTestCase): class HashPluginTest(SearxTestCase): - def test_PluginStore_init(self): store = plugins.PluginStore() plugin = plugins.load_and_initialize_plugin('searx.plugins.hash_plugin', False, (None, {})) @@ -107,8 +103,9 @@ class HashPluginTest(SearxTestCase): # MD5 search = get_search_mock(query='md5 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('md5 hash digest: 098f6bcd4621d373cade4e832627b4f6' - in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'md5 hash digest: 098f6bcd4621d373cade4e832627b4f6' in search.result_container.answers['hash']['answer'] + ) search = get_search_mock(query=b'md5 test', pageno=2) store.call(store.plugins, 'post_search', request, search) @@ -117,31 +114,41 @@ class HashPluginTest(SearxTestCase): # SHA1 search = get_search_mock(query='sha1 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('sha1 hash digest: a94a8fe5ccb19ba61c4c0873d391e9879' - '82fbbd3' in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'sha1 hash digest: a94a8fe5ccb19ba61c4c0873d391e9879' + '82fbbd3' in search.result_container.answers['hash']['answer'] + ) # SHA224 search = get_search_mock(query='sha224 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('sha224 hash digest: 90a3ed9e32b2aaf4c61c410eb9254261' - '19e1a9dc53d4286ade99a809' in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'sha224 hash digest: 90a3ed9e32b2aaf4c61c410eb9254261' + '19e1a9dc53d4286ade99a809' in search.result_container.answers['hash']['answer'] + ) # SHA256 search = get_search_mock(query='sha256 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('sha256 hash digest: 9f86d081884c7d659a2feaa0c55ad015a' - '3bf4f1b2b0b822cd15d6c15b0f00a08' in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'sha256 hash digest: 9f86d081884c7d659a2feaa0c55ad015a' + '3bf4f1b2b0b822cd15d6c15b0f00a08' in search.result_container.answers['hash']['answer'] + ) # SHA384 search = get_search_mock(query='sha384 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('sha384 hash digest: 768412320f7b0aa5812fce428dc4706b3c' - 'ae50e02a64caa16a782249bfe8efc4b7ef1ccb126255d196047dfedf1' - '7a0a9' in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'sha384 hash digest: 768412320f7b0aa5812fce428dc4706b3c' + 'ae50e02a64caa16a782249bfe8efc4b7ef1ccb126255d196047dfedf1' + '7a0a9' in search.result_container.answers['hash']['answer'] + ) # SHA512 search = get_search_mock(query='sha512 test', pageno=1) store.call(store.plugins, 'post_search', request, search) - self.assertTrue('sha512 hash digest: ee26b0dd4af7e749aa1a8ee3c10ae9923f6' - '18980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5' - 'fa9ad8e6f57f50028a8ff' in search.result_container.answers['hash']['answer']) + self.assertTrue( + 'sha512 hash digest: ee26b0dd4af7e749aa1a8ee3c10ae9923f6' + '18980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5' + 'fa9ad8e6f57f50028a8ff' in search.result_container.answers['hash']['answer'] + ) diff --git a/tests/unit/test_preferences.py b/tests/unit/test_preferences.py index 903b9b54d..1ffed5c1a 100644 --- a/tests/unit/test_preferences.py +++ b/tests/unit/test_preferences.py @@ -1,10 +1,16 @@ -from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentException, SearchLanguageSetting, - MultipleChoiceSetting, PluginsSetting, ValidationException) +from searx.preferences import ( + EnumStringSetting, + MapSetting, + MissingArgumentException, + SearchLanguageSetting, + MultipleChoiceSetting, + PluginsSetting, + ValidationException, +) from tests import SearxTestCase class PluginStub: - def __init__(self, plugin_id, default_on): self.id = plugin_id self.default_on = default_on @@ -121,20 +127,23 @@ class TestSettings(SearxTestCase): class TestPreferences(SearxTestCase): - def test_encode(self): from searx.preferences import Preferences + pref = Preferences(['oscar'], ['general'], {}, []) - url_params = 'eJx1VMmO2zAM_Zr6YrTocujJh6JF0QEKzKAz7VVgJNohLIseUU7ivy-VcWy5yyGOTVGP73GLKJNPYjiYgGeT4NB8BS9YOSY' \ - 'TUdifMDYM-vmGY1d5CN0EHTYOK88W_PXNkcDBozOjnzoK0vyi4bWnHs2RU4-zvHr_-RF9a-5Cy3GARByy7X7EkKMoBeMp9CuPQ-SzYMx' \ - '8Vr9P1qKI-XJ_p1fOkRJWNCgVM0a-zAttmBJbHkaPSZlNts-_jiuBFgUh2mPztkpHHLBhsRArDHvm356eHh5vATS0Mqagr0ZsZO_V8hT' \ - 'B9srt54_v6jewJugqL4Nn_hYSdhxnI-jRpi05GDQCStOT7UGVmJY8ZnltRKyF23SGiLWjqNcygKGkpyeGZIywJfD1gI5AjRTAmBM55Aw' \ - 'Q0Tn626lj7jzWo4e5hnEsIlprX6dTgdBRpyRBFKTDgBF8AasVyT4gvSTEoXRpXWRyG3CYQYld65I_V6lboILTMAlZY65_ejRDcHgp0Tv' \ - 'EPtGAsqTiBf3m76g7pP9B84mwjPvuUtASRDei1nDF2ix_JXW91UJkXrPh6RAhznVmKyQl7dwJdMJ6bz1QOmgzYlrEzHDMcEUuo44AgS1' \ - 'CvkjaOb2Q2AyY5oGDTs_OLXE_c2I5cg9hk3kEJZ0fu4SuktsIA2RhuJwP86AdripThCBeO9uVUejyPGmFSxPrqEYcuWi25zOEXV9tc1m' \ - '_KP1nafYtdfv6Q9hKfWmGm9A_3G635UwiVndLGdFCiLWkONk0xUxGLGGweGWTa2nZYZ0fS1YKlE3Uuw8fPl52E5U8HJYbC7sbjXUsrnT' \ + url_params = ( + 'eJx1VMmO2zAM_Zr6YrTocujJh6JF0QEKzKAz7VVgJNohLIseUU7ivy-VcWy5yyGOTVGP73GLKJNPYjiYgGeT4NB8BS9YOSY' + 'TUdifMDYM-vmGY1d5CN0EHTYOK88W_PXNkcDBozOjnzoK0vyi4bWnHs2RU4-zvHr_-RF9a-5Cy3GARByy7X7EkKMoBeMp9CuPQ-SzYMx' + '8Vr9P1qKI-XJ_p1fOkRJWNCgVM0a-zAttmBJbHkaPSZlNts-_jiuBFgUh2mPztkpHHLBhsRArDHvm356eHh5vATS0Mqagr0ZsZO_V8hT' + 'B9srt54_v6jewJugqL4Nn_hYSdhxnI-jRpi05GDQCStOT7UGVmJY8ZnltRKyF23SGiLWjqNcygKGkpyeGZIywJfD1gI5AjRTAmBM55Aw' + 'Q0Tn626lj7jzWo4e5hnEsIlprX6dTgdBRpyRBFKTDgBF8AasVyT4gvSTEoXRpXWRyG3CYQYld65I_V6lboILTMAlZY65_ejRDcHgp0Tv' + 'EPtGAsqTiBf3m76g7pP9B84mwjPvuUtASRDei1nDF2ix_JXW91UJkXrPh6RAhznVmKyQl7dwJdMJ6bz1QOmgzYlrEzHDMcEUuo44AgS1' + 'CvkjaOb2Q2AyY5oGDTs_OLXE_c2I5cg9hk3kEJZ0fu4SuktsIA2RhuJwP86AdripThCBeO9uVUejyPGmFSxPrqEYcuWi25zOEXV9tc1m' + '_KP1nafYtdfv6Q9hKfWmGm9A_3G635UwiVndLGdFCiLWkONk0xUxGLGGweGWTa2nZYZ0fS1YKlE3Uuw8fPl52E5U8HJYbC7sbjXUsrnT' 'XHXRbELfO-1fGSqskiGnMK7B0dV3t8Lq08pbdtYpuVdoKWA2Yjuyah_vHp2rZWjo0zXL8Gw8DTj0=' + ) pref.parse_encoded_data(url_params) self.assertEqual( vars(pref.key_value_settings['categories']), - {'value': ['general'], 'locked': False, 'choices': ['general', 'none']}) + {'value': ['general'], 'locked': False, 'choices': ['general', 'none']}, + ) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index edb0a18f7..9a53f8f47 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -17,7 +17,6 @@ TEST_ENGINES = [ class TestQuery(SearxTestCase): - def test_simple_query(self): query_text = 'the query' query = RawTextQuery(query_text, []) @@ -58,7 +57,6 @@ class TestQuery(SearxTestCase): class TestLanguageParser(SearxTestCase): - def test_language_code(self): language = 'es-ES' query_text = 'the query' @@ -136,7 +134,6 @@ class TestLanguageParser(SearxTestCase): class TestTimeoutParser(SearxTestCase): - def test_timeout_below100(self): query_text = '<3 the query' query = RawTextQuery(query_text, []) @@ -189,7 +186,6 @@ class TestTimeoutParser(SearxTestCase): class TestExternalBangParser(SearxTestCase): - def test_external_bang(self): query_text = '!!ddg the query' query = RawTextQuery(query_text, []) diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index aa99e78c7..113e9cd3c 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -4,10 +4,7 @@ from searx.results import ResultContainer from tests import SearxTestCase -def fake_result(url='https://aa.bb/cc?dd=ee#ff', - title='aaa', - content='bbb', - engine='wikipedia', **kwargs): +def fake_result(url='https://aa.bb/cc?dd=ee#ff', title='aaa', content='bbb', engine='wikipedia', **kwargs): result = { # fmt: off 'url': url, @@ -22,7 +19,6 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff', # TODO class ResultContainerTestCase(SearxTestCase): - def test_empty(self): c = ResultContainer() self.assertEqual(c.get_ordered_results(), []) diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py index c7f15a681..fa16947be 100644 --- a/tests/unit/test_search.py +++ b/tests/unit/test_search.py @@ -22,11 +22,11 @@ TEST_ENGINES = [ class SearchQueryTestCase(SearxTestCase): - def test_repr(self): s = SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g') - self.assertEqual(repr(s), - "SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g')") # noqa + self.assertEqual( + repr(s), "SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, '1', 5.0, 'g')" + ) # noqa def test_eq(self): s = SearchQuery('test', [EngineRef('bing', 'general')], 'all', 0, 1, None, None, None) @@ -36,64 +36,80 @@ class SearchQueryTestCase(SearxTestCase): class SearchTestCase(SearxTestCase): - @classmethod def setUpClass(cls): searx.search.initialize(TEST_ENGINES) def test_timeout_simple(self): settings['outgoing']['max_request_timeout'] = None - search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, None) + search_query = SearchQuery( + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None + ) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 3.0) def test_timeout_query_above_default_nomax(self): settings['outgoing']['max_request_timeout'] = None - search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, 5.0) + search_query = SearchQuery( + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 5.0 + ) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 3.0) def test_timeout_query_below_default_nomax(self): settings['outgoing']['max_request_timeout'] = None - search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, 1.0) + search_query = SearchQuery( + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 1.0 + ) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 1.0) def test_timeout_query_below_max(self): settings['outgoing']['max_request_timeout'] = 10.0 - search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, 5.0) + search_query = SearchQuery( + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 5.0 + ) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 5.0) def test_timeout_query_above_max(self): settings['outgoing']['max_request_timeout'] = 10.0 - search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, 15.0) + search_query = SearchQuery( + 'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 15.0 + ) search = searx.search.Search(search_query) search.search() self.assertEqual(search.actual_timeout, 10.0) def test_external_bang(self): - search_query = SearchQuery('yes yes', - [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, None, - external_bang="yt") + search_query = SearchQuery( + 'yes yes', + [EngineRef(PUBLIC_ENGINE_NAME, 'general')], + 'en-US', + SAFESEARCH, + PAGENO, + None, + None, + external_bang="yt", + ) search = searx.search.Search(search_query) results = search.search() # For checking if the user redirected with the youtube external bang self.assertTrue(results.redirect_url is not None) - search_query = SearchQuery('youtube never gonna give you up', - [EngineRef(PUBLIC_ENGINE_NAME, 'general')], - 'en-US', SAFESEARCH, PAGENO, None, None) + search_query = SearchQuery( + 'youtube never gonna give you up', + [EngineRef(PUBLIC_ENGINE_NAME, 'general')], + 'en-US', + SAFESEARCH, + PAGENO, + None, + None, + ) search = searx.search.Search(search_query) results = search.search() diff --git a/tests/unit/test_settings_loader.py b/tests/unit/test_settings_loader.py index 9e04df653..13a2d4f37 100644 --- a/tests/unit/test_settings_loader.py +++ b/tests/unit/test_settings_loader.py @@ -12,7 +12,6 @@ test_dir = abspath(dirname(__file__)) class TestLoad(SearxTestCase): - def test_load_zero(self): with self.assertRaises(SearxSettingsException): settings_loader.load_yaml('/dev/zero') @@ -31,7 +30,6 @@ class TestLoad(SearxTestCase): class TestDefaultSettings(SearxTestCase): - def test_load(self): settings, msg = settings_loader.load_settings(load_user_setttings=False) self.assertTrue(msg.startswith('load the default settings from')) @@ -46,7 +44,6 @@ class TestDefaultSettings(SearxTestCase): class TestUserSettings(SearxTestCase): - def test_is_use_default_settings(self): self.assertFalse(settings_loader.is_use_default_settings({})) self.assertTrue(settings_loader.is_use_default_settings({'use_default_settings': True})) @@ -57,23 +54,24 @@ class TestUserSettings(SearxTestCase): self.assertFalse(settings_loader.is_use_default_settings({'use_default_settings': 0})) def test_user_settings_not_found(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': '/dev/null'}): + with patch.dict(settings_loader.environ, {'SEARXNG_SETTINGS_PATH': '/dev/null'}): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('load the default settings from')) self.assertEqual(settings['server']['secret_key'], "ultrasecretkey") def test_user_settings(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_simple.yml')}): + with patch.dict( + settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_simple.yml')} + ): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) self.assertEqual(settings['server']['secret_key'], "user_secret_key") self.assertEqual(settings['server']['default_http_headers']['Custom-Header'], "Custom-Value") def test_user_settings_remove(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove.yml')}): + with patch.dict( + settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove.yml')} + ): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) self.assertEqual(settings['server']['secret_key'], "user_secret_key") @@ -84,8 +82,9 @@ class TestUserSettings(SearxTestCase): self.assertIn('wikipedia', engine_names) def test_user_settings_remove2(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove2.yml')}): + with patch.dict( + settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove2.yml')} + ): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) self.assertEqual(settings['server']['secret_key'], "user_secret_key") @@ -101,8 +100,9 @@ class TestUserSettings(SearxTestCase): self.assertEqual(newengine[0]['engine'], 'dummy') def test_user_settings_keep_only(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_keep_only.yml')}): + with patch.dict( + settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_keep_only.yml')} + ): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) engine_names = [engine['name'] for engine in settings['engines']] @@ -111,8 +111,9 @@ class TestUserSettings(SearxTestCase): self.assertEqual(len(settings['engines'][2]), 1) def test_custom_settings(self): - with patch.dict(settings_loader.environ, - {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings.yml')}): + with patch.dict( + settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings.yml')} + ): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('load the user settings from')) self.assertEqual(settings['server']['port'], 9000) diff --git a/tests/unit/test_standalone_searx.py b/tests/unit/test_standalone_searx.py index c81598160..a3d8b4d4f 100644 --- a/tests/unit/test_standalone_searx.py +++ b/tests/unit/test_standalone_searx.py @@ -23,8 +23,7 @@ class StandaloneSearx(SearxTestCase): def test_parse_argument_no_args(self): """Test parse argument without args.""" - with patch.object(sys, 'argv', ['standalone_searx']), \ - self.assertRaises(SystemExit): + with patch.object(sys, 'argv', ['standalone_searx']), self.assertRaises(SystemExit): sys.stderr = io.StringIO() sas.parse_argument() sys.stdout = sys.__stderr__ @@ -33,8 +32,13 @@ class StandaloneSearx(SearxTestCase): """Test parse argument with basic args.""" query = 'red box' exp_dict = { - 'query': query, 'category': 'general', 'lang': 'all', 'pageno': 1, - 'safesearch': '0', 'timerange': None} + 'query': query, + 'category': 'general', + 'lang': 'all', + 'pageno': 1, + 'safesearch': '0', + 'timerange': None, + } args = ['standalone_searx', query] with patch.object(sys, 'argv', args): res = sas.parse_argument() @@ -45,16 +49,16 @@ class StandaloneSearx(SearxTestCase): def test_to_dict(self): """test to_dict.""" self.assertEqual( - sas.to_dict( - sas.get_search_query(sas.parse_argument(['red box']))), + sas.to_dict(sas.get_search_query(sas.parse_argument(['red box']))), { - 'search': { - 'q': 'red box', 'pageno': 1, 'lang': 'all', - 'safesearch': 0, 'timerange': None - }, - 'results': [], 'infoboxes': [], 'suggestions': [], - 'answers': [], 'paging': False, 'results_number': 0 - } + 'search': {'q': 'red box', 'pageno': 1, 'lang': 'all', 'safesearch': 0, 'timerange': None}, + 'results': [], + 'infoboxes': [], + 'suggestions': [], + 'answers': [], + 'paging': False, + 'results_number': 0, + }, ) def test_to_dict_with_mock(self): @@ -77,30 +81,28 @@ class StandaloneSearx(SearxTestCase): 'safesearch': m_sq.safesearch, 'timerange': m_sq.time_range, }, - 'suggestions': [] - } + 'suggestions': [], + }, ) def test_get_search_query(self): """test get_search_query.""" - args = sas.parse_argument(['rain', ]) + args = sas.parse_argument( + [ + 'rain', + ] + ) search_q = sas.get_search_query(args) self.assertTrue(search_q) - self.assertEqual(search_q, SearchQuery('rain', [EngineRef('engine1', 'general')], - 'all', 0, 1, None, None, None)) + self.assertEqual( + search_q, SearchQuery('rain', [EngineRef('engine1', 'general')], 'all', 0, 1, None, None, None) + ) def test_no_parsed_url(self): """test no_parsed_url func""" - self.assertEqual( - sas.no_parsed_url([{'parsed_url': 'http://example.com'}]), - [{}] - ) + self.assertEqual(sas.no_parsed_url([{'parsed_url': 'http://example.com'}]), [{}]) - @params( - (datetime.datetime(2020, 1, 1), '2020-01-01T00:00:00'), - ('a'.encode('utf8'), 'a'), - (set([1]), [1]) - ) + @params((datetime.datetime(2020, 1, 1), '2020-01-01T00:00:00'), ('a'.encode('utf8'), 'a'), (set([1]), [1])) def test_json_serial(self, arg, exp_res): """test json_serial func""" self.assertEqual(sas.json_serial(arg), exp_res) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index bea28c0cc..3b79797e3 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -9,7 +9,6 @@ from tests import SearxTestCase class TestUtils(SearxTestCase): - def test_gen_useragent(self): self.assertIsInstance(utils.gen_useragent(), str) self.assertIsNotNone(utils.gen_useragent()) @@ -73,6 +72,7 @@ class TestUtils(SearxTestCase): def test_extract_url(self): def f(html_str, search_url): return utils.extract_url(html.fromstring(html_str), search_url) + self.assertEqual(f('<span id="42">https://example.com</span>', 'http://example.com/'), 'https://example.com/') self.assertEqual(f('https://example.com', 'http://example.com/'), 'https://example.com/') self.assertEqual(f('//example.com', 'http://example.com/'), 'http://example.com/') @@ -122,14 +122,11 @@ class TestUtils(SearxTestCase): def test_ecma_unscape(self): self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space') - self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), - 'text using %xx: ó') - self.assertEqual(utils.ecma_unescape('text using %u: %u5409, %u4E16%u754c'), - 'text using %u: 吉, 世界') + self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: ó') + self.assertEqual(utils.ecma_unescape('text using %u: %u5409, %u4E16%u754c'), 'text using %u: 吉, 世界') class TestHTMLTextExtractor(SearxTestCase): - def setUp(self): self.html_text_extractor = utils.HTMLTextExtractor() diff --git a/tests/unit/test_webadapter.py b/tests/unit/test_webadapter.py index 9d8ff5f28..975d846cc 100644 --- a/tests/unit/test_webadapter.py +++ b/tests/unit/test_webadapter.py @@ -25,7 +25,6 @@ SEARCHQUERY = [EngineRef(PRIVATE_ENGINE_NAME, 'general')] class ValidateQueryCase(SearxTestCase): - @classmethod def setUpClass(cls): searx.search.initialize(TEST_ENGINES) diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py index 43b631cb8..920a346a5 100644 --- a/tests/unit/test_webapp.py +++ b/tests/unit/test_webapp.py @@ -10,11 +10,11 @@ from tests import SearxTestCase class ViewsTestCase(SearxTestCase): - def setUp(self): # skip init function (no external HTTP request) def dummy(*args, **kwargs): pass + self.setattr4test(searx.search.processors, 'initialize_processor', dummy) from searx import webapp # pylint disable=import-outside-toplevel @@ -30,43 +30,39 @@ class ViewsTestCase(SearxTestCase): 'url': 'http://first.test.xyz', 'engines': ['youtube', 'startpage'], 'engine': 'startpage', - 'parsed_url': ParseResult(scheme='http', netloc='first.test.xyz', path='/', params='', query='', fragment=''), # noqa - }, { + 'parsed_url': ParseResult( + scheme='http', netloc='first.test.xyz', path='/', params='', query='', fragment='' + ), # noqa + }, + { 'content': 'second test content', 'title': 'Second Test', 'url': 'http://second.test.xyz', 'engines': ['youtube', 'startpage'], 'engine': 'youtube', - 'parsed_url': ParseResult(scheme='http', netloc='second.test.xyz', path='/', params='', query='', fragment=''), # noqa + 'parsed_url': ParseResult( + scheme='http', netloc='second.test.xyz', path='/', params='', query='', fragment='' + ), # noqa }, ] - timings = [ - { - 'engine': 'startpage', - 'total': 0.8, - 'load': 0.7 - }, - { - 'engine': 'youtube', - 'total': 0.9, - 'load': 0.6 - } - ] + timings = [{'engine': 'startpage', 'total': 0.8, 'load': 0.7}, {'engine': 'youtube', 'total': 0.9, 'load': 0.6}] def search_mock(search_self, *args): - search_self.result_container = Mock(get_ordered_results=lambda: test_results, - answers=dict(), - corrections=set(), - suggestions=set(), - infoboxes=[], - unresponsive_engines=set(), - results=test_results, - results_number=lambda: 3, - results_length=lambda: len(test_results), - get_timings=lambda: timings, - redirect_url=None, - engine_data={}) + search_self.result_container = Mock( + get_ordered_results=lambda: test_results, + answers=dict(), + corrections=set(), + suggestions=set(), + infoboxes=[], + unresponsive_engines=set(), + results=test_results, + results_number=lambda: 3, + results_length=lambda: len(test_results), + get_timings=lambda: timings, + redirect_url=None, + engine_data={}, + ) self.setattr4test(Search, 'search', search_mock) @@ -82,9 +78,12 @@ class ViewsTestCase(SearxTestCase): def test_index_empty(self): result = self.app.post('/') self.assertEqual(result.status_code, 200) - self.assertIn(b'<div class="text-hide center-block" id="main-logo">' - + b'<img class="center-block img-responsive" src="/static/themes/oscar/img/searxng.svg"' - + b' alt="searx logo" />SearXNG</div>', result.data) + self.assertIn( + b'<div class="text-hide center-block" id="main-logo">' + + b'<img class="center-block img-responsive" src="/static/themes/oscar/img/searxng.svg"' + + b' alt="searx logo" />SearXNG</div>', + result.data, + ) def test_index_html_post(self): result = self.app.post('/', data={'q': 'test'}) @@ -120,11 +119,10 @@ class ViewsTestCase(SearxTestCase): b'<h4 class="result_header" id="result-2"><img width="32" height="32" class="favicon"' + b' src="/static/themes/oscar/img/icons/youtube.png" alt="youtube" /><a href="http://second.test.xyz"' + b' rel="noreferrer" aria-labelledby="result-2">Second <span class="highlight">Test</span></a></h4>', # noqa - result.data + result.data, ) self.assertIn( - b'<p class="result-content">second <span class="highlight">test</span> content</p>', # noqa - result.data + b'<p class="result-content">second <span class="highlight">test</span> content</p>', result.data # noqa ) def test_index_json(self): @@ -151,7 +149,7 @@ class ViewsTestCase(SearxTestCase): b'title,url,content,host,engine,score,type\r\n' b'First Test,http://first.test.xyz,first test content,first.test.xyz,startpage,,result\r\n' # noqa b'Second Test,http://second.test.xyz,second test content,second.test.xyz,youtube,,result\r\n', # noqa - result.data + result.data, ) def test_index_rss(self): @@ -161,30 +159,15 @@ class ViewsTestCase(SearxTestCase): def test_search_rss(self): result = self.app.post('/search', data={'q': 'test', 'format': 'rss'}) - self.assertIn( - b'<description>Search results for "test" - searx</description>', - result.data - ) + self.assertIn(b'<description>Search results for "test" - searx</description>', result.data) - self.assertIn( - b'<opensearch:totalResults>3</opensearch:totalResults>', - result.data - ) + self.assertIn(b'<opensearch:totalResults>3</opensearch:totalResults>', result.data) - self.assertIn( - b'<title>First Test</title>', - result.data - ) + self.assertIn(b'<title>First Test</title>', result.data) - self.assertIn( - b'<link>http://first.test.xyz</link>', - result.data - ) + self.assertIn(b'<link>http://first.test.xyz</link>', result.data) - self.assertIn( - b'<description>first test content</description>', - result.data - ) + self.assertIn(b'<description>first test content</description>', result.data) def test_about(self): result = self.app.get('/about') @@ -199,18 +182,9 @@ class ViewsTestCase(SearxTestCase): def test_preferences(self): result = self.app.get('/preferences') self.assertEqual(result.status_code, 200) - self.assertIn( - b'<form method="post" action="/preferences" id="search_form">', - result.data - ) - self.assertIn( - b'<label class="col-sm-3 col-md-2" for="categories">Default categories</label>', - result.data - ) - self.assertIn( - b'<label class="col-sm-3 col-md-2" for="locale">Interface language</label>', - result.data - ) + self.assertIn(b'<form method="post" action="/preferences" id="search_form">', result.data) + self.assertIn(b'<label class="col-sm-3 col-md-2" for="categories">Default categories</label>', result.data) + self.assertIn(b'<label class="col-sm-3 col-md-2" for="locale">Interface language</label>', result.data) def test_browser_locale(self): result = self.app.get('/preferences', headers={'Accept-Language': 'zh-tw;q=0.8'}) @@ -218,30 +192,26 @@ class ViewsTestCase(SearxTestCase): self.assertIn( b'<option value="zh-Hant-TW" selected="selected">', result.data, - 'Interface locale ignored browser preference.' + 'Interface locale ignored browser preference.', ) self.assertIn( b'<option value="zh-Hant-TW" selected="selected">', result.data, - 'Search language ignored browser preference.' + 'Search language ignored browser preference.', ) def test_brower_empty_locale(self): result = self.app.get('/preferences', headers={'Accept-Language': ''}) self.assertEqual(result.status_code, 200) self.assertIn( - b'<option value="en" selected="selected">', - result.data, - 'Interface locale ignored browser preference.' + b'<option value="en" selected="selected">', result.data, 'Interface locale ignored browser preference.' ) def test_locale_occitan(self): result = self.app.get('/preferences?locale=oc') self.assertEqual(result.status_code, 200) self.assertIn( - b'<option value="oc" selected="selected">', - result.data, - 'Interface locale ignored browser preference.' + b'<option value="oc" selected="selected">', result.data, 'Interface locale ignored browser preference.' ) def test_stats(self): diff --git a/tests/unit/test_webutils.py b/tests/unit/test_webutils.py index 6da39a071..8bb3a0447 100644 --- a/tests/unit/test_webutils.py +++ b/tests/unit/test_webutils.py @@ -5,12 +5,13 @@ from tests import SearxTestCase class TestWebUtils(SearxTestCase): - def test_prettify_url(self): - data = (('https://searx.me/', 'https://searx.me/'), - ('https://searx.me/ű', 'https://searx.me/ű'), - ('https://searx.me/' + (100 * 'a'), 'https://searx.me/[...]aaaaaaaaaaaaaaaaa'), - ('https://searx.me/' + (100 * 'ű'), 'https://searx.me/[...]űűűűűűűűűűűűűűűűű')) + data = ( + ('https://searx.me/', 'https://searx.me/'), + ('https://searx.me/ű', 'https://searx.me/ű'), + ('https://searx.me/' + (100 * 'a'), 'https://searx.me/[...]aaaaaaaaaaaaaaaaa'), + ('https://searx.me/' + (100 * 'ű'), 'https://searx.me/[...]űűűűűűűűűűűűűűűűű'), + ) for test_url, expected in data: self.assertEqual(webutils.prettify_url(test_url, max_length=32), expected) @@ -21,10 +22,7 @@ class TestWebUtils(SearxTestCase): self.assertEqual(webutils.highlight_content('', None), None) self.assertEqual(webutils.highlight_content(False, None), None) - contents = [ - '<html></html>' - 'not<' - ] + contents = ['<html></html>' 'not<'] for content in contents: self.assertEqual(webutils.highlight_content(content, None), content) @@ -35,30 +33,35 @@ class TestWebUtils(SearxTestCase): self.assertEqual(webutils.highlight_content(content, query), content) data = ( - ('" test "', - 'a test string', - 'a <span class="highlight">test</span> string'), - ('"a"', - 'this is a test string', - 'this is<span class="highlight"> a </span>test string'), - ('a test', - 'this is a test string that matches entire query', - 'this is <span class="highlight">a test</span> string that matches entire query'), - ('this a test', - 'this is a string to test.', - ('<span class="highlight">this</span> is<span class="highlight"> a </span>' - 'string to <span class="highlight">test</span>.')), - ('match this "exact phrase"', - 'this string contains the exact phrase we want to match', - ('<span class="highlight">this</span> string contains the <span class="highlight">exact</span>' - ' <span class="highlight">phrase</span> we want to <span class="highlight">match</span>')) + ('" test "', 'a test string', 'a <span class="highlight">test</span> string'), + ('"a"', 'this is a test string', 'this is<span class="highlight"> a </span>test string'), + ( + 'a test', + 'this is a test string that matches entire query', + 'this is <span class="highlight">a test</span> string that matches entire query', + ), + ( + 'this a test', + 'this is a string to test.', + ( + '<span class="highlight">this</span> is<span class="highlight"> a </span>' + 'string to <span class="highlight">test</span>.' + ), + ), + ( + 'match this "exact phrase"', + 'this string contains the exact phrase we want to match', + ( + '<span class="highlight">this</span> string contains the <span class="highlight">exact</span>' + ' <span class="highlight">phrase</span> we want to <span class="highlight">match</span>' + ), + ), ) for query, content, expected in data: self.assertEqual(webutils.highlight_content(content, query), expected) class TestUnicodeWriter(SearxTestCase): - def setUp(self): self.unicode_writer = webutils.UnicodeWriter(mock.MagicMock()) @@ -74,7 +77,6 @@ class TestUnicodeWriter(SearxTestCase): class TestNewHmac(SearxTestCase): - def test_bytes(self): for secret_key in ['secret', b'secret', 1]: if secret_key == 1: @@ -82,6 +84,4 @@ class TestNewHmac(SearxTestCase): webutils.new_hmac(secret_key, b'http://example.com') continue res = webutils.new_hmac(secret_key, b'http://example.com') - self.assertEqual( - res, - '23e2baa2404012a5cc8e4a18b4aabf0dde4cb9b56f679ddc0fd6d7c24339d819') + self.assertEqual(res, '23e2baa2404012a5cc8e4a18b4aabf0dde4cb9b56f679ddc0fd6d7c24339d819') |