summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2024-02-16 20:46:18 +0000
committerMarkus Heiser <markus.heiser@darmarIT.de>2024-02-20 10:43:20 +0100
commited66ed758dbd1c926296aa227da8a82fff9166e1 (patch)
tree1e7f818ee5dab0d7c09bad09d3586f0f87231584
parent76845ea42c2c3484e30c118f07671b13ade07a29 (diff)
downloadsearxng-ed66ed758dbd1c926296aa227da8a82fff9166e1.tar.gz
searxng-ed66ed758dbd1c926296aa227da8a82fff9166e1.zip
[mod] reduce memory footprint by not calling babel.Locale.parse at runtime
babel.Locale.parse loads more than 60MB in RAM. The only purpose is to get: LOCALE_NAMES - searx.data.LOCALES["LOCALE_NAMES"] RTL_LOCALES - searx.data.LOCALES["RTL_LOCALES"] This commit calls babel.Locale.parse when the translations are update from weblate and stored in:: searx/data/locales.json This file can be build by:: ./manage data.locales By store these variables in searx.data when the translations are updated we save round about 65MB (usually 4 worker = 260MB of RAM saved. Suggested-by: https://github.com/searxng/searxng/discussions/2633#discussioncomment-8490494 Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
-rw-r--r--docs/dev/searxng_extra/update.rst10
-rw-r--r--docs/src/searx.locales.rst7
-rw-r--r--searx/data/__init__.py2
-rw-r--r--searx/data/locales.json69
-rw-r--r--searx/locales.py125
-rw-r--r--searx/sxng_locales.py6
-rwxr-xr-xsearxng_extra/update/update_engine_traits.py6
-rw-r--r--searxng_extra/update/update_locales.py103
-rwxr-xr-xutils/lib_sxng_data.sh11
-rwxr-xr-xutils/lib_sxng_weblate.sh5
10 files changed, 267 insertions, 77 deletions
diff --git a/docs/dev/searxng_extra/update.rst b/docs/dev/searxng_extra/update.rst
index a125303e0..dc3b06744 100644
--- a/docs/dev/searxng_extra/update.rst
+++ b/docs/dev/searxng_extra/update.rst
@@ -78,6 +78,16 @@ Scripts to update static data in :origin:`searx/data/`
.. automodule:: searxng_extra.update.update_pygments
:members:
+.. _update_locales.py:
+
+``update_locales.py``
+=====================
+
+:origin:`[source] <searxng_extra/update/update_locales.py>`
+
+.. automodule:: searxng_extra.update.update_locales
+ :members:
+
``update_wikidata_units.py``
============================
diff --git a/docs/src/searx.locales.rst b/docs/src/searx.locales.rst
index 0de49a5e1..9882e7890 100644
--- a/docs/src/searx.locales.rst
+++ b/docs/src/searx.locales.rst
@@ -10,11 +10,6 @@ Locales
:backlinks: entry
.. automodule:: searx.locales
- :members:
+ :members:
-SearXNG's locale codes
-======================
-
-.. automodule:: searx.sxng_locales
- :members:
diff --git a/searx/data/__init__.py b/searx/data/__init__.py
index 0822f4ac8..c79d1042f 100644
--- a/searx/data/__init__.py
+++ b/searx/data/__init__.py
@@ -15,6 +15,7 @@ __all__ = [
'EXTERNAL_BANGS',
'OSM_KEYS_TAGS',
'ENGINE_DESCRIPTIONS',
+ 'LOCALES',
'ahmia_blacklist_loader',
]
@@ -50,3 +51,4 @@ EXTERNAL_BANGS = _load('external_bangs.json')
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
ENGINE_TRAITS = _load('engine_traits.json')
+LOCALES = _load('locales.json')
diff --git a/searx/data/locales.json b/searx/data/locales.json
new file mode 100644
index 000000000..cb45b1601
--- /dev/null
+++ b/searx/data/locales.json
@@ -0,0 +1,69 @@
+{
+ "LOCALE_NAMES": {
+ "af": "Afrikaans",
+ "ar": "العربية (Arabic)",
+ "bg": "Български (Bulgarian)",
+ "bn": "বাংলা (Bangla)",
+ "bo": "བོད་སྐད་ (Tibetan)",
+ "ca": "Català (Catalan)",
+ "cs": "Čeština (Czech)",
+ "cy": "Cymraeg (Welsh)",
+ "da": "Dansk (Danish)",
+ "de": "Deutsch (German)",
+ "dv": "ދިވެހި (Dhivehi)",
+ "el-GR": "Ελληνικά, Ελλάδα (Greek, Greece)",
+ "en": "English",
+ "eo": "Esperanto",
+ "es": "Español (Spanish)",
+ "et": "Eesti (Estonian)",
+ "eu": "Euskara (Basque)",
+ "fa-IR": "فارسی, ایران (Persian, Iran)",
+ "fi": "Suomi (Finnish)",
+ "fil": "Filipino",
+ "fr": "Français (French)",
+ "gl": "Galego (Galician)",
+ "he": "עברית (Hebrew)",
+ "hr": "Hrvatski (Croatian)",
+ "hu": "Magyar (Hungarian)",
+ "ia": "Interlingua",
+ "id": "Indonesia (Indonesian)",
+ "it": "Italiano (Italian)",
+ "ja": "日本語 (Japanese)",
+ "ko": "한국어 (Korean)",
+ "lt": "Lietuvių (Lithuanian)",
+ "lv": "Latviešu (Latvian)",
+ "ml": "മലയാളം (Malayalam)",
+ "ms": "Melayu (Malay)",
+ "nb-NO": "Norsk bokmål, Norge (Norwegian bokmål, Norway)",
+ "nl": "Nederlands (Dutch)",
+ "nl-BE": "Nederlands, België (Dutch, Belgium)",
+ "oc": "Occitan",
+ "pa": "ਪੰਜਾਬੀ (Punjabi)",
+ "pap": "Papiamento",
+ "pl": "Polski (Polish)",
+ "pt": "Português (Portuguese)",
+ "pt-BR": "Português, Brasil (Portuguese, Brazil)",
+ "ro": "Română (Romanian)",
+ "ru": "Русский (Russian)",
+ "si": "සිංහල (Sinhala)",
+ "sk": "Slovenčina (Slovak)",
+ "sl": "Slovenščina (Slovenian)",
+ "sr": "Српски (Serbian)",
+ "sv": "Svenska (Swedish)",
+ "szl": "Ślōnski (Silesian)",
+ "ta": "தமிழ் (Tamil)",
+ "te": "తెలుగు (Telugu)",
+ "th": "ไทย (Thai)",
+ "tr": "Türkçe (Turkish)",
+ "uk": "Українська (Ukrainian)",
+ "vi": "Tiếng việt (Vietnamese)",
+ "zh-HK": "中文, 中國香港特別行政區 (Chinese, Hong Kong SAR China)",
+ "zh-Hans-CN": "中文, 中国 (Chinese, China)",
+ "zh-Hant-TW": "中文, 台灣 (Chinese, Taiwan)"
+ },
+ "RTL_LOCALES": [
+ "fa-IR",
+ "ar",
+ "he"
+ ]
+} \ No newline at end of file
diff --git a/searx/locales.py b/searx/locales.py
index 655f365ab..c2fa030b1 100644
--- a/searx/locales.py
+++ b/searx/locales.py
@@ -1,12 +1,36 @@
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Initialize :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.
"""
+SearXNG’s locale data
+=====================
-from typing import Set, Optional, List
-import os
-import pathlib
+The variables :py:obj:`RTL_LOCALES` and :py:obj:`LOCALE_NAMES` are loaded from
+:origin:`searx/data/locales.json` / see :py:obj:`locales_initialize` and
+:ref:`update_locales.py`.
+
+.. hint::
+
+ Whenever the value of :py:obj:`ADDITIONAL_TRANSLATIONS` or
+ :py:obj:`LOCALE_BEST_MATCH` is modified, the
+ :origin:`searx/data/locales.json` needs to be rebuild::
+
+ ./manage data.locales
+
+SearXNG's locale codes
+======================
+
+.. automodule:: searx.sxng_locales
+ :members:
+
+
+SearXNG’s locale implementations
+================================
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
import babel
from babel.support import Translations
@@ -15,7 +39,11 @@ import babel.core
import flask_babel
import flask
from flask.ctx import has_request_context
-from searx import logger
+from searx import (
+ data,
+ logger,
+ searx_dir,
+)
logger = logger.getChild('locales')
@@ -30,7 +58,7 @@ LOCALE_NAMES = {}
:meta hide-value:
"""
-RTL_LOCALES: Set[str] = set()
+RTL_LOCALES: set[str] = set()
"""List of *Right-To-Left* locales e.g. 'he' or 'fa-IR' (see
:py:obj:`locales_initialize`)."""
@@ -52,7 +80,7 @@ LOCALE_BEST_MATCH = {
"pap": "pt-BR",
}
"""Map a locale we do not have a translations for to a locale we have a
-translation for. By example: use Taiwan version of the translation for Hong
+translation for. By example: use Taiwan version of the translation for Hong
Kong."""
@@ -90,74 +118,37 @@ def get_translations():
return _flask_babel_get_translations()
-def get_locale_descr(locale, locale_name):
- """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
+_TR_LOCALES: list[str] = []
- :param locale: instance of :py:class:`Locale`
- :param locale_name: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*)
- """
-
- native_language, native_territory = _get_locale_descr(locale, locale_name)
- english_language, english_territory = _get_locale_descr(locale, 'en')
-
- if native_territory == english_territory:
- english_territory = None
- if not native_territory and not english_territory:
- if native_language == english_language:
- return native_language
- return native_language + ' (' + english_language + ')'
+def get_translation_locales() -> list[str]:
+ """Returns the list of transaltion locales (*underscore*). The list is
+ generated from the translation folders in :origin:`searx/translations`"""
- result = native_language + ', ' + native_territory + ' (' + english_language
- if english_territory:
- return result + ', ' + english_territory + ')'
- return result + ')'
+ global _TR_LOCALES # pylint:disable=global-statement
+ if _TR_LOCALES:
+ return _TR_LOCALES
-
-def _get_locale_descr(locale, language_code):
- language_name = locale.get_language_name(language_code).capitalize()
- if language_name and ('a' <= language_name[0] <= 'z'):
- language_name = language_name.capitalize()
- territory_name = locale.get_territory_name(language_code)
- return language_name, territory_name
+ tr_locales = []
+ for folder in (Path(searx_dir) / 'translations').iterdir():
+ if not folder.is_dir():
+ continue
+ if not (folder / 'LC_MESSAGES').is_dir():
+ continue
+ tr_locales.append(folder.name)
+ _TR_LOCALES = sorted(tr_locales)
+ return _TR_LOCALES
-def locales_initialize(directory=None):
+def locales_initialize():
"""Initialize locales environment of the SearXNG session.
- monkey patch :py:obj:`flask_babel.get_translations` by :py:obj:`get_translations`
- init global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`
"""
-
- directory = directory or pathlib.Path(__file__).parent / 'translations'
- logger.debug("locales_initialize: %s", directory)
flask_babel.get_translations = get_translations
-
- for tag, descr in ADDITIONAL_TRANSLATIONS.items():
- locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
- LOCALE_NAMES[tag] = descr
- if locale.text_direction == 'rtl':
- RTL_LOCALES.add(tag)
-
- for tag in LOCALE_BEST_MATCH:
- descr = LOCALE_NAMES.get(tag)
- if not descr:
- locale = babel.Locale.parse(tag, sep='-')
- LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
- if locale.text_direction == 'rtl':
- RTL_LOCALES.add(tag)
-
- for dirname in sorted(os.listdir(directory)):
- # Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations
- if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')):
- continue
- tag = dirname.replace('_', '-')
- descr = LOCALE_NAMES.get(tag)
- if not descr:
- locale = babel.Locale.parse(dirname)
- LOCALE_NAMES[tag] = get_locale_descr(locale, dirname)
- if locale.text_direction == 'rtl':
- RTL_LOCALES.add(tag)
+ LOCALE_NAMES.update(data.LOCALES["LOCALE_NAMES"])
+ RTL_LOCALES.update(data.LOCALES["RTL_LOCALES"])
def region_tag(locale: babel.Locale) -> str:
@@ -177,7 +168,7 @@ def language_tag(locale: babel.Locale) -> str:
return sxng_lang
-def get_locale(locale_tag: str) -> Optional[babel.Locale]:
+def get_locale(locale_tag: str) -> babel.Locale | None:
"""Returns a :py:obj:`babel.Locale` object parsed from argument
``locale_tag``"""
try:
@@ -190,7 +181,7 @@ def get_locale(locale_tag: str) -> Optional[babel.Locale]:
def get_official_locales(
territory: str, languages=None, regional: bool = False, de_facto: bool = True
-) -> Set[babel.Locale]:
+) -> set[babel.Locale]:
"""Returns a list of :py:obj:`babel.Locale` with languages from
:py:obj:`babel.languages.get_official_languages`.
@@ -376,7 +367,7 @@ def get_engine_locale(searxng_locale, engine_locales, default=None):
return default
-def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Optional[str] = None) -> Optional[str]:
+def match_locale(searxng_locale: str, locale_tag_list: list[str], fallback: str | None = None) -> str | None:
"""Return tag from ``locale_tag_list`` that best fits to ``searxng_locale``.
:param str searxng_locale: SearXNG's internal representation of locale (de,
@@ -425,7 +416,7 @@ def match_locale(searxng_locale: str, locale_tag_list: List[str], fallback: Opti
return get_engine_locale(searxng_locale, engine_locales, default=fallback)
-def build_engine_locales(tag_list: List[str]):
+def build_engine_locales(tag_list: list[str]):
"""From a list of locale tags a dictionary is build that can be passed by
argument ``engine_locales`` to :py:obj:`get_engine_locale`. This function
is mainly used by :py:obj:`match_locale` and is similar to what the
diff --git a/searx/sxng_locales.py b/searx/sxng_locales.py
index 1ea673d7c..27f892386 100644
--- a/searx/sxng_locales.py
+++ b/searx/sxng_locales.py
@@ -1,9 +1,11 @@
# -*- coding: utf-8 -*-
'''List of SearXNG's locale codes.
-This file is generated automatically by::
+.. hint::
- ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
+ Don't modify this file, this file is generated by::
+
+ ./manage data.traits
'''
sxng_locales = (
diff --git a/searxng_extra/update/update_engine_traits.py b/searxng_extra/update/update_engine_traits.py
index 46892cc2b..faab198d2 100755
--- a/searxng_extra/update/update_engine_traits.py
+++ b/searxng_extra/update/update_engine_traits.py
@@ -31,9 +31,11 @@ languages_file_header = """\
# -*- coding: utf-8 -*-
'''List of SearXNG's locale codes.
-This file is generated automatically by::
+.. hint::
- ./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
+ Don't modify this file, this file is generated by::
+
+ ./manage data.traits
'''
sxng_locales = (
diff --git a/searxng_extra/update/update_locales.py b/searxng_extra/update/update_locales.py
new file mode 100644
index 000000000..e823ebaf1
--- /dev/null
+++ b/searxng_extra/update/update_locales.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Update locale names in :origin:`searx/data/locales.json` used by
+:ref:`searx.locales`
+
+- :py:obj:`searx.locales.RTL_LOCALES`
+- :py:obj:`searx.locales.LOCALE_NAMES`
+"""
+from __future__ import annotations
+
+from typing import Set
+import json
+from pathlib import Path
+import os
+
+import babel
+import babel.languages
+import babel.core
+
+from searx import searx_dir
+from searx.locales import (
+ ADDITIONAL_TRANSLATIONS,
+ LOCALE_BEST_MATCH,
+ get_translation_locales,
+)
+
+LOCALE_DATA_FILE = Path(searx_dir) / 'data' / 'locales.json'
+TRANSLATOINS_FOLDER = Path(searx_dir) / 'translations'
+
+
+def main():
+
+ LOCALE_NAMES = {}
+ RTL_LOCALES: Set[str] = set()
+
+ for tag, descr in ADDITIONAL_TRANSLATIONS.items():
+ locale = babel.Locale.parse(LOCALE_BEST_MATCH[tag], sep='-')
+ LOCALE_NAMES[tag] = descr
+ if locale.text_direction == 'rtl':
+ RTL_LOCALES.add(tag)
+
+ for tag in LOCALE_BEST_MATCH:
+ descr = LOCALE_NAMES.get(tag)
+ if not descr:
+ locale = babel.Locale.parse(tag, sep='-')
+ LOCALE_NAMES[tag] = get_locale_descr(locale, tag.replace('-', '_'))
+ if locale.text_direction == 'rtl':
+ RTL_LOCALES.add(tag)
+
+ for tr_locale in get_translation_locales():
+ sxng_tag = tr_locale.replace('_', '-')
+ descr = LOCALE_NAMES.get(sxng_tag)
+ if not descr:
+ locale = babel.Locale.parse(tr_locale)
+ LOCALE_NAMES[sxng_tag] = get_locale_descr(locale, tr_locale)
+ if locale.text_direction == 'rtl':
+ RTL_LOCALES.add(sxng_tag)
+
+ content = {
+ "LOCALE_NAMES": LOCALE_NAMES,
+ "RTL_LOCALES": list(RTL_LOCALES),
+ }
+
+ with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f:
+ json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
+
+
+def get_locale_descr(locale: babel.Locale, tr_locale):
+ """Get locale name e.g. 'Français - fr' or 'Português (Brasil) - pt-BR'
+
+ :param locale: instance of :py:class:`Locale`
+ :param tr_locale: name e.g. 'fr' or 'pt_BR' (delimiter is *underscore*)
+ """
+
+ native_language, native_territory = _get_locale_descr(locale, tr_locale)
+ english_language, english_territory = _get_locale_descr(locale, 'en')
+
+ if native_territory == english_territory:
+ english_territory = None
+
+ if not native_territory and not english_territory:
+ # none territory name
+ if native_language == english_language:
+ return native_language
+ return native_language + ' (' + english_language + ')'
+
+ else:
+ result = native_language + ', ' + native_territory + ' (' + english_language
+ if english_territory:
+ return result + ', ' + english_territory + ')'
+ return result + ')'
+
+
+def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:
+ language_name = locale.get_language_name(tr_locale).capitalize() # type: ignore
+ if language_name and ('a' <= language_name[0] <= 'z'):
+ language_name = language_name.capitalize()
+ territory_name: str = locale.get_territory_name(tr_locale) # type: ignore
+ return language_name, territory_name
+
+
+if __name__ == "__main__":
+ main()
diff --git a/utils/lib_sxng_data.sh b/utils/lib_sxng_data.sh
index 549e6dbec..50a932f6d 100755
--- a/utils/lib_sxng_data.sh
+++ b/utils/lib_sxng_data.sh
@@ -7,6 +7,7 @@ data.:
all : update searx/sxng_locales.py and searx/data/*
traits : update searx/data/engine_traits.json & searx/sxng_locales.py
useragents: update searx/data/useragents.json with the most recent versions of Firefox
+ locales : update searx/data/locales.json from babel
EOF
}
@@ -16,6 +17,7 @@ data.all() {
pyenv.activate
data.traits
data.useragents
+ data.locales
build_msg DATA "update searx/data/osm_keys_tags.json"
pyenv.cmd python searxng_extra/update/update_osm_keys_tags.py
@@ -49,6 +51,15 @@ data.useragents() {
dump_return $?
}
+data.locales() {
+ ( set -e
+ pyenv.activate
+ build_msg DATA "update searx/data/locales.json"
+ python searxng_extra/update/update_locales.py
+ )
+ dump_return $?
+}
+
docs.prebuild() {
build_msg DOCS "build ${DOCS_BUILD}/includes"
(
diff --git a/utils/lib_sxng_weblate.sh b/utils/lib_sxng_weblate.sh
index f52b75d7c..f2b19257c 100755
--- a/utils/lib_sxng_weblate.sh
+++ b/utils/lib_sxng_weblate.sh
@@ -96,10 +96,15 @@ weblate.translations.commit() {
build_msg BABEL 'compile translation catalogs into binary MO files'
pybabel compile --statistics \
-d "searx/translations"
+
+ # update searx/data/translation_labels.json
+ data.locales
+
# git add/commit (no push)
commit_body=$(cd "${TRANSLATIONS_WORKTREE}"; git log --pretty=format:'%h - %as - %aN <%ae>' "${existing_commit_hash}..HEAD")
commit_message=$(echo -e "[translations] update from Weblate\n\n${commit_body}")
git add searx/translations
+ git add searx/data/locales.json
git commit -m "${commit_message}"
)
exitcode=$?