summaryrefslogtreecommitdiff
path: root/searxng_extra/update
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2024-02-16 20:46:18 +0000
committerMarkus Heiser <markus.heiser@darmarIT.de>2024-02-20 10:43:20 +0100
commited66ed758dbd1c926296aa227da8a82fff9166e1 (patch)
tree1e7f818ee5dab0d7c09bad09d3586f0f87231584 /searxng_extra/update
parent76845ea42c2c3484e30c118f07671b13ade07a29 (diff)
downloadsearxng-ed66ed758dbd1c926296aa227da8a82fff9166e1.tar.gz
searxng-ed66ed758dbd1c926296aa227da8a82fff9166e1.zip
[mod] reduce memory footprint by not calling babel.Locale.parse at runtime
babel.Locale.parse loads more than 60MB in RAM. The only purpose is to get: LOCALE_NAMES - searx.data.LOCALES["LOCALE_NAMES"] RTL_LOCALES - searx.data.LOCALES["RTL_LOCALES"] This commit calls babel.Locale.parse when the translations are update from weblate and stored in:: searx/data/locales.json This file can be build by:: ./manage data.locales By store these variables in searx.data when the translations are updated we save round about 65MB (usually 4 worker = 260MB of RAM saved. Suggested-by: https://github.com/searxng/searxng/discussions/2633#discussioncomment-8490494 Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searxng_extra/update')
-rwxr-xr-xsearxng_extra/update/update_engine_traits.py6
-rw-r--r--searxng_extra/update/update_locales.py103
2 files changed, 107 insertions, 2 deletions
diff --git a/searxng_extra/update/update_engine_traits.py b/searxng_extra/update/update_engine_traits.py
index 46892cc2b..faab198d2 100755
--- a/searxng_extra/update/update_engine_traits.py
+++ b/searxng_extra/update/update_engine_traits.py
@@ -31,9 +31,11 @@ languages_file_header = """\
# -*- coding: utf-8 -*-
'''List of SearXNG's locale codes.
-This file is generated automatically by::
+.. hint::
- ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
+ Don't modify this file, this file is generated by::
+
+ ./manage data.traits
'''
sxng_locales = (
diff --git a/searxng_extra/update/update_locales.py b/searxng_extra/update/update_locales.py
new file mode 100644
index 000000000..e823ebaf1
--- /dev/null
+++ b/searxng_extra/update/update_locales.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Update locale names in :origin:`searx/data/locales.json` used by
+:ref:`searx.locales`
+
+- :py:obj:`searx.locales.RTL_LOCALES`
+- :py:obj:`searx.locales.LOCALE_NAMES`
+"""
+from __future__ import annotations
+
+from typing import Set
+import json
+from pathlib import Path
+import os
+
+import babel
+import babel.languages
+import babel.core
+
+from searx import searx_dir
+from searx.locales import (
+ ADDITIONAL_TRANSLATIONS,
+ LOCALE_BEST_MATCH,
+ get_translation_locales,
+)
+
+LOCALE_DATA_FILE = Path(searx_dir) / 'data' / 'locales.json'
+TRANSLATOINS_FOLDER = Path(searx_dir) / 'translations'
+
+
+def main():
+
+ LOCALE_NAMES = {}
+ RTL_LOCALES: Set[str] = set()
+
+ for tag, descr in ADDITIONAL_TRANSLATIONS.items():
+ locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
+ LOCALE_NAMES[tag] = descr
+ if locale.text_direction == 'rtl':
+ RTL_LOCALES.add(tag)
+
+ for tag in LOCALE_BEST_MATCH:
+ descr = LOCALE_NAMES.get(tag)
+ if not descr:
+ locale = babel.Locale.parse(tag, sep='-')
+ LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
+ if locale.text_direction == 'rtl':
+ RTL_LOCALES.add(tag)
+
+ for tr_locale in get_translation_locales():
+ sxng_tag = tr_locale.replace('_', '-')
+ descr = LOCALE_NAMES.get(sxng_tag)
+ if not descr:
+ locale = babel.Locale.parse(tr_locale)
+ LOCALE_NAMES[sxng_tag] = get_locale_descr(locale, tr_locale)
+ if locale.text_direction == 'rtl':
+ RTL_LOCALES.add(sxng_tag)
+
+ content = {
+ "LOCALE_NAMES": LOCALE_NAMES,
+ "RTL_LOCALES": list(RTL_LOCALES),
+ }
+
+ with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f:
+ json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
+
+
+def get_locale_descr(locale: babel.Locale, tr_locale):
+ """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
+
+ :param locale: instance of :py:class:`Locale`
+ :param tr_locale: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*)
+ """
+
+ native_language, native_territory = _get_locale_descr(locale, tr_locale)
+ english_language, english_territory = _get_locale_descr(locale, 'en')
+
+ if native_territory == english_territory:
+ english_territory = None
+
+ if not native_territory and not english_territory:
+ # none territory name
+ if native_language == english_language:
+ return native_language
+ return native_language + ' (' + english_language + ')'
+
+ else:
+ result = native_language + ', ' + native_territory + ' (' + english_language
+ if english_territory:
+ return result + ', ' + english_territory + ')'
+ return result + ')'
+
+
+def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:
+ language_name = locale.get_language_name(tr_locale).capitalize() # type: ignore
+ if language_name and ('a' <= language_name[0] <= 'z'):
+ language_name = language_name.capitalize()
+ territory_name: str = locale.get_territory_name(tr_locale) # type: ignore
+ return language_name, territory_name
+
+
+if __name__ == "__main__":
+ main()