diff options
author | Alexandre Flament <alex@al-f.net> | 2024-02-16 20:46:18 +0000 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-02-20 10:43:20 +0100 |
commit | ed66ed758dbd1c926296aa227da8a82fff9166e1 (patch) | |
tree | 1e7f818ee5dab0d7c09bad09d3586f0f87231584 /searxng_extra/update | |
parent | 76845ea42c2c3484e30c118f07671b13ade07a29 (diff) | |
download | searxng-ed66ed758dbd1c926296aa227da8a82fff9166e1.tar.gz searxng-ed66ed758dbd1c926296aa227da8a82fff9166e1.zip |
[mod] reduce memory footprint by not calling babel.Locale.parse at runtime
babel.Locale.parse loads more than 60MB in RAM. The only purpose is to get:
LOCALE_NAMES - searx.data.LOCALES["LOCALE_NAMES"]
RTL_LOCALES - searx.data.LOCALES["RTL_LOCALES"]
This commit calls babel.Locale.parse when the translations are update from
weblate and stored in::
searx/data/locales.json
This file can be build by::
./manage data.locales
By store these variables in searx.data when the translations are updated we save
round about 65MB (usually 4 worker = 260MB of RAM saved.
Suggested-by: https://github.com/searxng/searxng/discussions/2633#discussioncomment-8490494
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searxng_extra/update')
-rwxr-xr-x | searxng_extra/update/update_engine_traits.py | 6 | ||||
-rw-r--r-- | searxng_extra/update/update_locales.py | 103 |
2 files changed, 107 insertions, 2 deletions
diff --git a/searxng_extra/update/update_engine_traits.py b/searxng_extra/update/update_engine_traits.py index 46892cc2b..faab198d2 100755 --- a/searxng_extra/update/update_engine_traits.py +++ b/searxng_extra/update/update_engine_traits.py @@ -31,9 +31,11 @@ languages_file_header = """\ # -*- coding: utf-8 -*- '''List of SearXNG's locale codes. -This file is generated automatically by:: +.. hint:: - ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py + Don't modify this file, this file is generated by:: + + ./manage data.traits ''' sxng_locales = ( diff --git a/searxng_extra/update/update_locales.py b/searxng_extra/update/update_locales.py new file mode 100644 index 000000000..e823ebaf1 --- /dev/null +++ b/searxng_extra/update/update_locales.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Update locale names in :origin:`searx/data/locales.json` used by +:ref:`searx.locales` + +- :py:obj:`searx.locales.RTL_LOCALES` +- :py:obj:`searx.locales.LOCALE_NAMES` +""" +from __future__ import annotations + +from typing import Set +import json +from pathlib import Path +import os + +import babel +import babel.languages +import babel.core + +from searx import searx_dir +from searx.locales import ( + ADDITIONAL_TRANSLATIONS, + LOCALE_BEST_MATCH, + get_translation_locales, +) + +LOCALE_DATA_FILE = Path(searx_dir) / 'data' / 'locales.json' +TRANSLATOINS_FOLDER = Path(searx_dir) / 'translations' + + +def main(): + + LOCALE_NAMES = {} + RTL_LOCALES: Set[str] = set() + + for tag, descr in ADDITIONAL_TRANSLATIONS.items(): + locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-') + LOCALE_NAMES[tag] = descr + if locale.text_direction == 'rtl': + RTL_LOCALES.add(tag) + + for tag in LOCALE_BEST_MATCH: + descr = LOCALE_NAMES.get(tag) + if not descr: + locale = babel.Locale.parse(tag, sep='-') + LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_')) + if locale.text_direction == 'rtl': + RTL_LOCALES.add(tag) + + for tr_locale in get_translation_locales(): + sxng_tag = tr_locale.replace('_', '-') + descr = LOCALE_NAMES.get(sxng_tag) + if not descr: + locale = babel.Locale.parse(tr_locale) + LOCALE_NAMES[sxng_tag] = get_locale_descr(locale, tr_locale) + if locale.text_direction == 'rtl': + RTL_LOCALES.add(sxng_tag) + + content = { + "LOCALE_NAMES": LOCALE_NAMES, + "RTL_LOCALES": list(RTL_LOCALES), + } + + with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f: + json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False) + + +def get_locale_descr(locale: babel.Locale, tr_locale): + """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR' + + :param locale: instance of :py:class:`Locale` + :param tr_locale: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*) + """ + + native_language, native_territory = _get_locale_descr(locale, tr_locale) + english_language, english_territory = _get_locale_descr(locale, 'en') + + if native_territory == english_territory: + english_territory = None + + if not native_territory and not english_territory: + # none territory name + if native_language == english_language: + return native_language + return native_language + ' (' + english_language + ')' + + else: + result = native_language + ', ' + native_territory + ' (' + english_language + if english_territory: + return result + ', ' + english_territory + ')' + return result + ')' + + +def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]: + language_name = locale.get_language_name(tr_locale).capitalize() # type: ignore + if language_name and ('a' <= language_name[0] <= 'z'): + language_name = language_name.capitalize() + territory_name: str = locale.get_territory_name(tr_locale) # type: ignore + return language_name, territory_name + + +if __name__ == "__main__": + main() |