diff options
author | Alexandre Flament <alex@al-f.net> | 2021-10-12 21:50:05 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-10-12 21:50:05 +0200 |
commit | ee5e9f9e1d488d8d24086b8111ac6ecff4abd77a (patch) | |
tree | 3e42dc89cfbeb1e8b33a5d30dcbfd73713e860de | |
parent | b86aacdad65f8c18f01392a1a18d021006e55b21 (diff) | |
parent | 4759f65b69724589dcf1aafd04f7d7902952de98 (diff) | |
download | searxng-ee5e9f9e1d488d8d24086b8111ac6ecff4abd77a.tar.gz searxng-ee5e9f9e1d488d8d24086b8111ac6ecff4abd77a.zip |
Merge pull request #380 from dalf/fix-339
[fix] interface language zh_CN and zh_TW don't work
-rw-r--r-- | searx/locales.py | 34 | ||||
-rw-r--r-- | searx/templates/oscar/base.html | 2 | ||||
-rw-r--r-- | searx/templates/simple/base.html | 2 | ||||
-rw-r--r-- | searx/translations/zh_Hans_CN/LC_MESSAGES/messages.mo (renamed from searx/translations/zh_CN/LC_MESSAGES/messages.mo) | bin | 10091 -> 10091 bytes | |||
-rw-r--r-- | searx/translations/zh_Hans_CN/LC_MESSAGES/messages.po (renamed from searx/translations/zh_CN/LC_MESSAGES/messages.po) | 0 | ||||
-rw-r--r-- | searx/translations/zh_Hant_TW/LC_MESSAGES/messages.mo (renamed from searx/translations/zh_TW/LC_MESSAGES/messages.mo) | bin | 10065 -> 10065 bytes | |||
-rw-r--r-- | searx/translations/zh_Hant_TW/LC_MESSAGES/messages.po (renamed from searx/translations/zh_TW/LC_MESSAGES/messages.po) | 0 | ||||
-rw-r--r-- | searx/utils.py | 26 | ||||
-rwxr-xr-x | searx/webapp.py | 23 | ||||
-rw-r--r-- | tests/unit/test_utils.py | 8 | ||||
-rw-r--r-- | tests/unit/test_webapp.py | 4 |
11 files changed, 68 insertions, 31 deletions
diff --git a/searx/locales.py b/searx/locales.py index 42d5854c2..b791f35f3 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`UI_LOCALE_CODES` and -:py:obj:`RTL_LOCALES`.""" +"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`. +""" -from typing import List, Set +from typing import Set import os import pathlib @@ -12,17 +12,14 @@ from babel import Locale LOCALE_NAMES = { "oc": "Occitan", - "nl_BE": "Vlaams (Dutch, Belgium)", + "nl-BE": "Vlaams (Dutch, Belgium)", } -"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt_BR' -(delimiter is *underline* '_')""" - -UI_LOCALE_CODES: List[str] = [] -"""List of locales e.g. 'fr' or 'pt-BR' (delimiter is '-')""" +"""Mapping of locales and their description. Locales e.g. 'fr' or 'pt-BR' +(delimiter is *underline* '-')""" RTL_LOCALES: Set[str] = set() -"""List of *Right-To-Left* locales e.g. 'he' or 'fa_IR' (delimiter is -*underline* '_')""" +"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (delimiter is +*underline* '-')""" def _get_name(locale, language_code): @@ -37,7 +34,7 @@ def _get_locale_name(locale, locale_name): """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR' :param locale: instance of :py:class:`Locale` - :param locale_name: name e.g. 'fr' or 'pt_BR' + :param locale_name: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*) """ native_language, native_territory = _get_name(locale, locale_name) english_language, english_territory = _get_name(locale, 'en') @@ -54,22 +51,19 @@ def _get_locale_name(locale, locale_name): def initialize_locales(directory): - """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`UI_LOCALE_CODES` and - :py:obj:`RTL_LOCALES`. + """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`. """ - global UI_LOCALE_CODES # pylint: disable=global-statement for dirname in sorted(os.listdir(directory)): # Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations if not os.path.isdir( os.path.join(directory, dirname, 'LC_MESSAGES') ): continue - info = LOCALE_NAMES.get(dirname) + locale_name = dirname.replace('_', '-') + info = LOCALE_NAMES.get(locale_name) if not info: locale = Locale.parse(dirname) - LOCALE_NAMES[dirname] = _get_locale_name(locale, dirname) + LOCALE_NAMES[locale_name] = _get_locale_name(locale, dirname) if locale.text_direction == 'rtl': - RTL_LOCALES.add(dirname) - - UI_LOCALE_CODES = [l.replace('_', '-') for l in LOCALE_NAMES] + RTL_LOCALES.add(locale_name) initialize_locales(pathlib.Path(__file__).parent / 'translations') diff --git a/searx/templates/oscar/base.html b/searx/templates/oscar/base.html index 01dedc782..de7d05bf6 100644 --- a/searx/templates/oscar/base.html +++ b/searx/templates/oscar/base.html @@ -1,6 +1,6 @@ {% from 'oscar/macros.html' import icon %} <!DOCTYPE html> -<html lang="{{ preferences.get_value('locale') }}" xml:lang="{{ preferences.get_value('locale') }}"{% if rtl %} dir="rtl"{% endif %} class="nojs"> +<html lang="{{ locale_rfc5646 }}" xml:lang="{{ locale_rfc5646 }}"{% if rtl %} dir="rtl"{% endif %} class="nojs"> <head> <meta charset="UTF-8" /> <meta name="description" content="SearXNG - a privacy-respecting, hackable metasearch engine" /> diff --git a/searx/templates/simple/base.html b/searx/templates/simple/base.html index ac6a09822..108d2fcf5 100644 --- a/searx/templates/simple/base.html +++ b/searx/templates/simple/base.html @@ -1,5 +1,5 @@ <!DOCTYPE html> -<html class="no-js" lang="en" {% if rtl %} dir="rtl"{% endif %}> +<html class="no-js" lang="{{ locale_rfc5646 }}" {% if rtl %} dir="rtl"{% endif %}> <head> <meta charset="UTF-8" /> <meta name="description" content="SearXNG — a privacy-respecting, hackable metasearch engine"> diff --git a/searx/translations/zh_CN/LC_MESSAGES/messages.mo b/searx/translations/zh_Hans_CN/LC_MESSAGES/messages.mo Binary files differindex 3c59a1800..3c59a1800 100644 --- a/searx/translations/zh_CN/LC_MESSAGES/messages.mo +++ b/searx/translations/zh_Hans_CN/LC_MESSAGES/messages.mo diff --git a/searx/translations/zh_CN/LC_MESSAGES/messages.po b/searx/translations/zh_Hans_CN/LC_MESSAGES/messages.po index 1e7f35f49..1e7f35f49 100644 --- a/searx/translations/zh_CN/LC_MESSAGES/messages.po +++ b/searx/translations/zh_Hans_CN/LC_MESSAGES/messages.po diff --git a/searx/translations/zh_TW/LC_MESSAGES/messages.mo b/searx/translations/zh_Hant_TW/LC_MESSAGES/messages.mo Binary files differindex 5535a29b3..5535a29b3 100644 --- a/searx/translations/zh_TW/LC_MESSAGES/messages.mo +++ b/searx/translations/zh_Hant_TW/LC_MESSAGES/messages.mo diff --git a/searx/translations/zh_TW/LC_MESSAGES/messages.po b/searx/translations/zh_Hant_TW/LC_MESSAGES/messages.po index e604fe981..e604fe981 100644 --- a/searx/translations/zh_TW/LC_MESSAGES/messages.po +++ b/searx/translations/zh_Hant_TW/LC_MESSAGES/messages.po diff --git a/searx/utils.py b/searx/utils.py index 4c2c9e429..163892e93 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -369,6 +369,16 @@ def _get_lang_to_lc_dict(lang_list): return value +# babel's get_global contains all sorts of miscellaneous locale and territory related data +# see get_global in: https://github.com/python-babel/babel/blob/master/babel/core.py +def _get_from_babel(lang_code, key): + match = get_global(key).get(lang_code.replace('-', '_')) + # for some keys, such as territory_aliases, match may be a list + if isinstance(match, str): + return match.replace('_', '-') + return match + + def _match_language(lang_code, lang_list=[], custom_aliases={}): # pylint: disable=W0102 """auxiliary function to match lang_code in lang_list""" # replace language code with a custom alias if necessary @@ -379,9 +389,11 @@ def _match_language(lang_code, lang_list=[], custom_aliases={}): # pylint: disa return lang_code # try to get the most likely country for this language - subtags = get_global('likely_subtags').get(lang_code) + subtags = _get_from_babel(lang_code, 'likely_subtags') if subtags: - subtag_parts = subtags.split('_') + if subtags in lang_list: + return subtags + subtag_parts = subtags.split('-') new_code = subtag_parts[0] + '-' + subtag_parts[-1] if new_code in custom_aliases: new_code = custom_aliases[new_code] @@ -402,16 +414,22 @@ def match_language(locale_code, lang_list=[], custom_aliases={}, fallback='en-US locale_parts = locale_code.split('-') lang_code = locale_parts[0] + # if locale_code has script, try matching without it + if len(locale_parts) > 2: + language = _match_language(lang_code + '-' + locale_parts[-1], lang_list, custom_aliases) + if language: + return language + # try to get language using an equivalent country code if len(locale_parts) > 1: - country_alias = get_global('territory_aliases').get(locale_parts[-1]) + country_alias = _get_from_babel(locale_parts[-1], 'territory_aliases') if country_alias: language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases) if language: return language # try to get language using an equivalent language code - alias = get_global('language_aliases').get(lang_code) + alias = _get_from_babel(lang_code, 'language_aliases') if alias: language = _match_language(alias, lang_list, custom_aliases) if language: diff --git a/searx/webapp.py b/searx/webapp.py index ab9c96712..e8fb92565 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -109,7 +109,7 @@ from searx.flaskfix import patch_application from searx.autocomplete import search_autocomplete, backends as autocomplete_backends from searx.languages import language_codes as languages -from searx.locales import LOCALE_NAMES, UI_LOCALE_CODES, RTL_LOCALES +from searx.locales import LOCALE_NAMES, RTL_LOCALES from searx.search import SearchWithPlugins, initialize as search_initialize from searx.network import stream as http_stream, set_context_network_name from searx.search.checker import get_result as checker_get_result @@ -223,6 +223,12 @@ def get_locale(): if locale == 'oc': request.form['use-translation'] = 'oc' locale = 'fr_FR' + if locale == '': + # if there is an error loading the preferences + # the locale is going to be '' + locale = 'en' + # babel uses underscore instead of hyphen. + locale = locale.replace('-', '_') logger.debug("%s uses locale `%s`", urllib.parse.quote(request.url), locale) return locale @@ -240,6 +246,16 @@ def _get_browser_language(req, lang_list): return 'en' +def _get_locale_rfc5646(locale): + """Get locale name for <html lang="..."> + Chrom* browsers don't detect the language when there is a subtag (ie a territory). + For example "zh-TW" is detected but not "zh-Hant-TW". + This function returns a locale without the subtag. + """ + parts = locale.split('-') + return parts[0].lower() + '-' + parts[-1].upper() + + # code-highlighter @app.template_filter('code_highlighter') def code_highlighter(codelines, language=None): @@ -420,6 +436,8 @@ def render(template_name, override_theme=None, **kwargs): kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) locale = request.preferences.get_value('locale') + kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale) + if locale in RTL_LOCALES and 'rtl' not in kwargs: kwargs['rtl'] = True if 'current_language' not in kwargs: @@ -512,8 +530,7 @@ def pre_request(): # locale is defined neither in settings nor in preferences # use browser headers if not preferences.get_value("locale"): - locale = _get_browser_language(request, UI_LOCALE_CODES) - locale = locale.replace('-', '_') + locale = _get_browser_language(request, LOCALE_NAMES.keys()) preferences.parse_dict({"locale": locale}) # request.user_plugins diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index e9c247382..bea28c0cc 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -92,6 +92,14 @@ class TestUtils(SearxTestCase): self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback') self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp') + # handle script tags + self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN') + self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW') + self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN') + self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW') + self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN') + self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW') + aliases = {'en-GB': 'en-UK', 'he': 'iw'} # guess country diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py index c2d57f80c..ff58aa1e9 100644 --- a/tests/unit/test_webapp.py +++ b/tests/unit/test_webapp.py @@ -211,12 +211,12 @@ class ViewsTestCase(SearxTestCase): result = self.app.get('/preferences', headers={'Accept-Language': 'zh-tw;q=0.8'}) self.assertEqual(result.status_code, 200) self.assertIn( - b'<option value="zh_TW" selected="selected">', + b'<option value="zh-Hant-TW" selected="selected">', result.data, 'Interface locale ignored browser preference.' ) self.assertIn( - b'<option value="zh-TW" selected="selected">', + b'<option value="zh-Hant-TW" selected="selected">', result.data, 'Search language ignored browser preference.' ) |