diff options
-rwxr-xr-x | searxng_extra/update/update_ahmia_blacklist.py | 7 | ||||
-rwxr-xr-x | searxng_extra/update/update_currencies.py | 7 | ||||
-rwxr-xr-x | searxng_extra/update/update_engine_descriptions.py | 7 | ||||
-rwxr-xr-x | searxng_extra/update/update_firefox_version.py | 41 | ||||
-rwxr-xr-x | searxng_extra/update/update_languages.py | 29 |
5 files changed, 51 insertions, 40 deletions
diff --git a/searxng_extra/update/update_ahmia_blacklist.py b/searxng_extra/update/update_ahmia_blacklist.py index 57fb78b34..26c485195 100755 --- a/searxng_extra/update/update_ahmia_blacklist.py +++ b/searxng_extra/update/update_ahmia_blacklist.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later """This script saves `Ahmia's blacklist`_ for onion sites. @@ -21,9 +22,7 @@ def fetch_ahmia_blacklist(): resp = requests.get(URL, timeout=3.0) if resp.status_code != 200: raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) - else: - blacklist = resp.text.split() - return blacklist + return resp.text.split() def get_ahmia_blacklist_filename(): @@ -32,5 +31,5 @@ def get_ahmia_blacklist_filename(): if __name__ == '__main__': blacklist = fetch_ahmia_blacklist() - with open(get_ahmia_blacklist_filename(), "w") as f: + with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f: f.write('\n'.join(blacklist)) diff --git a/searxng_extra/update/update_currencies.py b/searxng_extra/update/update_currencies.py index cdff4cbc9..e51692e72 100755 --- a/searxng_extra/update/update_currencies.py +++ b/searxng_extra/update/update_currencies.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later """Fetch currencies from :origin:`searx/engines/wikidata.py` engine. @@ -7,13 +8,15 @@ Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ... <.github/workflows/data-update.yml>`). """ + +# pylint: disable=invalid-name + import re import unicodedata import json # set path -from sys import path -from os.path import realpath, dirname, join +from os.path import join from searx import searx_dir from searx.locales import LOCALE_NAMES diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py index bab1a0349..5b73fd396 100755 --- a/searxng_extra/update/update_engine_descriptions.py +++ b/searxng_extra/update/update_engine_descriptions.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later """Fetch website description from websites and from @@ -8,6 +9,8 @@ Output file: :origin:`searx/data/engine_descriptions.json`. """ +# pylint: disable=invalid-name, global-statement + import json from urllib.parse import urlparse from os.path import join @@ -109,7 +112,7 @@ def get_wikipedia_summary(lang, pageid): response.raise_for_status() api_result = json.loads(response.text) return api_result.get('extract') - except: + except Exception: # pylint: disable=broad-except return None @@ -141,7 +144,7 @@ def get_website_description(url, lang1, lang2=None): try: response = searx.network.get(url, headers=headers, timeout=10) response.raise_for_status() - except Exception: + except Exception: # pylint: disable=broad-except return (None, None) try: diff --git a/searxng_extra/update/update_firefox_version.py b/searxng_extra/update/update_firefox_version.py index 163982b16..a447f9fd5 100755 --- a/searxng_extra/update/update_firefox_version.py +++ b/searxng_extra/update/update_firefox_version.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later """Fetch firefox useragent signatures @@ -9,20 +10,21 @@ Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ... """ import json -import requests import re -from os.path import dirname, join +from os.path import join from urllib.parse import urlparse, urljoin -from distutils.version import LooseVersion, StrictVersion +from distutils.version import LooseVersion + +import requests from lxml import html from searx import searx_dir URL = 'https://ftp.mozilla.org/pub/firefox/releases/' RELEASE_PATH = '/pub/firefox/releases/' -NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$') -# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$') -# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$') +NORMAL_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?$') +# BETA_REGEX = re.compile(r'.*[0-9]b([0-9\-a-z]+)$') +# ESR_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?esr$') # useragents = { @@ -39,20 +41,19 @@ def fetch_firefox_versions(): resp = requests.get(URL, timeout=2.0) if resp.status_code != 200: raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) - else: - dom = html.fromstring(resp.text) - versions = [] - - for link in dom.xpath('//a/@href'): - url = urlparse(urljoin(URL, link)) - path = url.path - if path.startswith(RELEASE_PATH): - version = path[len(RELEASE_PATH) : -1] - if NORMAL_REGEX.match(version): - versions.append(LooseVersion(version)) - - list.sort(versions, reverse=True) - return versions + dom = html.fromstring(resp.text) + versions = [] + + for link in dom.xpath('//a/@href'): + url = urlparse(urljoin(URL, link)) + path = url.path + if path.startswith(RELEASE_PATH): + version = path[len(RELEASE_PATH) : -1] + if NORMAL_REGEX.match(version): + versions.append(LooseVersion(version)) + + list.sort(versions, reverse=True) + return versions def fetch_firefox_last_versions(): diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py index 9a71566a9..754180c47 100755 --- a/searxng_extra/update/update_languages.py +++ b/searxng_extra/update/update_languages.py @@ -1,4 +1,6 @@ #!/usr/bin/env python +# lint: pylint + # SPDX-License-Identifier: AGPL-3.0-or-later """This script generates languages.py from intersecting each engine's supported languages. @@ -9,6 +11,8 @@ Output files: :origin:`searx/data/engines_languages.json` and """ +# pylint: disable=invalid-name + import json from pathlib import Path from pprint import pformat @@ -28,7 +32,7 @@ languages_file = Path(searx_dir) / 'languages.py' def fetch_supported_languages(): set_timeout_for_thread(10.0) - engines_languages = dict() + engines_languages = {} names = list(engines) names.sort() @@ -36,7 +40,7 @@ def fetch_supported_languages(): if hasattr(engines[engine_name], 'fetch_supported_languages'): engines_languages[engine_name] = engines[engine_name].fetch_supported_languages() print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name)) - if type(engines_languages[engine_name]) == list: + if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck engines_languages[engine_name] = sorted(engines_languages[engine_name]) print("fetched languages from %s engines" % len(engines_languages)) @@ -59,7 +63,7 @@ def get_locale(lang_code): # Join all language lists. def join_language_lists(engines_languages): - language_list = dict() + language_list = {} for engine_name in engines_languages: for lang_code in engines_languages[engine_name]: @@ -95,7 +99,7 @@ def join_language_lists(engines_languages): 'name': language_name, 'english_name': english_name, 'counter': set(), - 'countries': dict(), + 'countries': {}, } # add language with country if not in list @@ -123,6 +127,7 @@ def join_language_lists(engines_languages): def filter_language_list(all_languages): min_engines_per_lang = 13 min_engines_per_country = 7 + # pylint: disable=consider-using-dict-items, consider-iterating-dictionary main_engines = [ engine_name for engine_name in engines.keys() @@ -142,7 +147,7 @@ def filter_language_list(all_languages): } def _copy_lang_data(lang, country_name=None): - new_dict = dict() + new_dict = {} new_dict['name'] = all_languages[lang]['name'] new_dict['english_name'] = all_languages[lang]['english_name'] if country_name: @@ -150,10 +155,10 @@ def filter_language_list(all_languages): return new_dict # for each language get country codes supported by most engines or at least one country code - filtered_languages_with_countries = dict() + filtered_languages_with_countries = {} for lang, lang_data in filtered_languages.items(): countries = lang_data['countries'] - filtered_countries = dict() + filtered_countries = {} # get language's country codes with enough supported engines for lang_country, country_data in countries.items(): @@ -215,7 +220,7 @@ def write_languages_file(languages): language_codes = tuple(language_codes) - with open(languages_file, 'w') as new_file: + with open(languages_file, 'w', encoding='utf-8') as new_file: file_content = "{file_headers} {language_codes},\n)\n".format( # fmt: off file_headers = '\n'.join(file_headers), @@ -228,7 +233,7 @@ def write_languages_file(languages): if __name__ == "__main__": load_engines(settings['engines']) - engines_languages = fetch_supported_languages() - all_languages = join_language_lists(engines_languages) - filtered_languages = filter_language_list(all_languages) - write_languages_file(filtered_languages) + _engines_languages = fetch_supported_languages() + _all_languages = join_language_lists(_engines_languages) + _filtered_languages = filter_language_list(_all_languages) + write_languages_file(_filtered_languages) |