summaryrefslogtreecommitdiff
path: root/searxng_extra
diff options
context:
space:
mode:
Diffstat (limited to 'searxng_extra')
-rwxr-xr-xsearxng_extra/update/update_languages.py80
1 files changed, 75 insertions, 5 deletions
diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py
index 754180c47..ebe9c884d 100755
--- a/searxng_extra/update/update_languages.py
+++ b/searxng_extra/update/update_languages.py
@@ -12,12 +12,13 @@ Output files: :origin:`searx/data/engines_languages.json` and
"""
# pylint: disable=invalid-name
-
+from unicodedata import lookup
import json
from pathlib import Path
from pprint import pformat
from babel import Locale, UnknownLocaleError
from babel.languages import get_global
+from babel.core import parse_locale
from searx import settings, searx_dir
from searx.engines import load_engines, engines
@@ -61,6 +62,62 @@ def get_locale(lang_code):
return None
+lang2emoji = {
+ 'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger
+ 'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina
+ 'jp': '\U0001F1EF\U0001F1F5', # Japanese
+ 'ua': '\U0001F1FA\U0001F1E6', # Ukrainian
+ 'he': '\U0001F1EE\U0001F1F7', # Hebrew
+ 'zh': '\U0001F1E8\U0001F1F3', # China (zh)
+}
+
+
+def get_unicode_flag(lang_code):
+ """Determine a unicode flag (emoji) that fits to the ``lang_code``"""
+
+ emoji = lang2emoji.get(lang_code.lower())
+ if emoji:
+ return emoji
+
+ if len(lang_code) == 2:
+ l_code = lang_code.lower()
+ c_code = lang_code.upper()
+ if c_code == 'EN':
+ c_code = 'GB'
+ lang_code = "%s-%s" % (l_code, c_code)
+
+ language = territory = script = variant = ''
+ try:
+ language, territory, script, variant = parse_locale(lang_code, '-')
+ except ValueError as exc:
+ print(exc)
+
+ # https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
+ if not territory:
+ # https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
+ emoji = lang2emoji.get(language)
+ if not emoji:
+ print(
+ "%s --> language: %s / territory: %s / script: %s / variant: %s"
+ % (lang_code, language, territory, script, variant)
+ )
+ return emoji
+
+ emoji = lang2emoji.get(territory.lower())
+ if emoji:
+ return emoji
+
+ try:
+ c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
+ c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
+ # print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
+ except KeyError as exc:
+ print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
+ return None
+
+ return c1 + c2
+
+
# Join all language lists.
def join_language_lists(engines_languages):
language_list = {}
@@ -113,7 +170,10 @@ def join_language_lists(engines_languages):
print("ERROR: %s --> %s" % (locale, exc))
locale = None
- language_list[short_code]['countries'][lang_code] = {'country_name': country_name, 'counter': set()}
+ language_list[short_code]['countries'][lang_code] = {
+ 'country_name': country_name,
+ 'counter': set(),
+ }
# count engine for both language_country combination and language alone
language_list[short_code]['counter'].add(engine_name)
@@ -167,7 +227,7 @@ def filter_language_list(all_languages):
# add language without countries too if there's more than one country to choose from
if len(filtered_countries) > 1:
- filtered_countries[lang] = _copy_lang_data(lang)
+ filtered_countries[lang] = _copy_lang_data(lang, None)
elif len(filtered_countries) == 1:
# if there's only one country per language, it's not necessary to show country name
lang_country = next(iter(filtered_countries))
@@ -183,15 +243,22 @@ def filter_language_list(all_languages):
lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
if lang_country:
- filtered_countries[lang_country] = _copy_lang_data(lang)
+ filtered_countries[lang_country] = _copy_lang_data(lang, None)
else:
- filtered_countries[lang] = _copy_lang_data(lang)
+ filtered_countries[lang] = _copy_lang_data(lang, None)
filtered_languages_with_countries.update(filtered_countries)
return filtered_languages_with_countries
+class UnicodeEscape(str):
+ """Escape unicode string in :py:obj:`pprint.pformat`"""
+
+ def __repr__(self):
+ return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
+
+
# Write languages.py.
def write_languages_file(languages):
file_headers = (
@@ -209,11 +276,14 @@ def write_languages_file(languages):
if name is None:
print("ERROR: languages['%s'] --> %s" % (code, languages[code]))
continue
+
+ flag = get_unicode_flag(code) or ''
item = (
code,
languages[code]['name'].split(' (')[0],
languages[code].get('country_name') or '',
languages[code].get('english_name') or '',
+ UnicodeEscape(flag),
)
language_codes.append(item)