diff options
author | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2021-10-04 22:44:58 -0700 |
---|---|---|
committer | Alexandre Flament <alex@al-f.net> | 2021-10-12 21:06:20 +0200 |
commit | 66b7be09656798b745f95ff0ce7669db014b3b38 (patch) | |
tree | 06b86307edb3b5c73931c29956b954ad5bf118d4 /searx/utils.py | |
parent | 4cc1ee8565e22442ae92ed5f2015749d684e6087 (diff) | |
download | searxng-66b7be09656798b745f95ff0ce7669db014b3b38.tar.gz searxng-66b7be09656798b745f95ff0ce7669db014b3b38.zip |
[fix] fix match_language issue to make zh-TW match to zh-Hant-TW
pybabel separates locales with underscores but we use hyphens
everywhere babel doesn't directly touch
Diffstat (limited to 'searx/utils.py')
-rw-r--r-- | searx/utils.py | 26 |
1 files changed, 22 insertions, 4 deletions
diff --git a/searx/utils.py b/searx/utils.py index 4c2c9e429..163892e93 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -369,6 +369,16 @@ def _get_lang_to_lc_dict(lang_list): return value +# babel's get_global contains all sorts of miscellaneous locale and territory related data +# see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py +def _get_from_babel(lang_code, key): + match = get_global(key).get(lang_code.replace('-', '_')) + # for some keys, such as territory_aliases, match may be a list + if isinstance(match, str): + return match.replace('_', '-') + return match + + def _match_language(lang_code, lang_list=[], custom_aliases={}): # pylint: disable=W0102 """auxiliary function to match lang_code in lang_list""" # replace language code with a custom alias if necessary @@ -379,9 +389,11 @@ def _match_language(lang_code, lang_list=[], custom_aliases={}): # pylint: disa return lang_code # try to get the most likely country for this language - subtags = get_global('likely_subtags').get(lang_code) + subtags = _get_from_babel(lang_code, 'likely_subtags') if subtags: - subtag_parts = subtags.split('_') + if subtags in lang_list: + return subtags + subtag_parts = subtags.split('-') new_code = subtag_parts[0] + '-' + subtag_parts[-1] if new_code in custom_aliases: new_code = custom_aliases[new_code] @@ -402,16 +414,22 @@ def match_language(locale_code, lang_list=[], custom_aliases={}, fallback='en-US locale_parts = locale_code.split('-') lang_code = locale_parts[0] + # if locale_code has script, try matching without it + if len(locale_parts) > 2: + language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases) + if language: + return language + # try to get language using an equivalent country code if len(locale_parts) > 1: - country_alias = get_global('territory_aliases').get(locale_parts[-1]) + country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases') if country_alias: language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases) if language: return language # try to get language using an equivalent language code - alias = get_global('language_aliases').get(lang_code) + alias = _get_from_babel(lang_code, 'language_aliases') if alias: language = _match_language(alias, lang_list, custom_aliases) if language: |