diff options
author | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2018-02-28 22:30:48 -0600 |
---|---|---|
committer | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2018-03-27 00:08:03 -0600 |
commit | 772c048d01c7585fd60afca1ce30a1914e6e5b4a (patch) | |
tree | 96a5662897df2bcf0ab53456e0a67ace998f2169 /searx/utils.py | |
parent | d1eae9359f8c5920632a730744ea2208070f06da (diff) | |
download | searxng-772c048d01c7585fd60afca1ce30a1914e6e5b4a.tar.gz searxng-772c048d01c7585fd60afca1ce30a1914e6e5b4a.zip |
refactor engine's search language handling
Add match_language function in utils to match any user given
language code with a list of engine's supported languages.
Also add language_aliases dict on each engine to translate
standard language codes into the custom codes used by the engine.
Diffstat (limited to 'searx/utils.py')
-rw-r--r-- | searx/utils.py | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/searx/utils.py b/searx/utils.py index 8f095f3b0..77c392909 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -4,6 +4,7 @@ import hmac import os import re +from babel.core import get_global from babel.dates import format_date from codecs import getincrementalencoder from imp import load_source @@ -12,6 +13,7 @@ from os.path import splitext, join from random import choice import sys +from searx import settings from searx.version import VERSION_STRING from searx.languages import language_codes from searx import settings @@ -322,6 +324,65 @@ def is_valid_lang(lang): return False +# auxiliary function to match lang_code in lang_list +def _match_language(lang_code, lang_list=[], custom_aliases={}): + # replace language code with a custom alias if necessary + if lang_code in custom_aliases: + lang_code = custom_aliases[lang_code] + + if lang_code in lang_list: + return lang_code + + # try to get the most likely country for this language + subtags = get_global('likely_subtags').get(lang_code) + if subtags: + subtag_parts = subtags.split('_') + new_code = subtag_parts[0] + '-' + subtag_parts[-1] + if new_code in custom_aliases: + new_code = custom_aliases[new_code] + if new_code in lang_list: + return new_code + + # try to get the any supported country for this language + for lc in lang_list: + if lang_code == lc.split('-')[0]: + return lc + + return None + + +# get the language code from lang_list that best matches locale_code +def match_language(locale_code, lang_list=[], custom_aliases={}, fallback='en-US'): + # try to get language from given locale_code + language = _match_language(locale_code, lang_list, custom_aliases) + if language: + return language + + locale_parts = locale_code.split('-') + lang_code = locale_parts[0] + + # try to get language using an equivalent country code + if len(locale_parts) > 1: + country_alias = get_global('territory_aliases').get(locale_parts[-1]) + if country_alias: + language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases) + if language: + return language + + # try to get language using an equivalent language code + alias = get_global('language_aliases').get(lang_code) + if alias: + language = _match_language(alias, lang_list, custom_aliases) + if language: + return language + + if lang_code != locale_code: + # try to get language from given language without giving the country + language = _match_language(lang_code, lang_list, custom_aliases) + + return language or fallback + + def load_module(filename, module_dir): modname = splitext(filename)[0] if modname in sys.modules: |