diff options
author | Markus Heiser <markus.heiser@darmarit.de> | 2023-02-07 14:11:58 +0100 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarit.de> | 2023-03-24 10:37:42 +0100 |
commit | 16f0db44939c23d2980d6fd2e5dfada13d8f5ee9 (patch) | |
tree | aff653cf2739f3d1fc35ac44b7adaa3ca0e14253 /tests | |
parent | 4d4aa13e1f1d254e5d57c67973a7809d9c1e21f9 (diff) | |
download | searxng-16f0db44939c23d2980d6fd2e5dfada13d8f5ee9.tar.gz searxng-16f0db44939c23d2980d6fd2e5dfada13d8f5ee9.zip |
[mod] replace utils.match_language by locales.match_locale
This patch replaces the *full of magic* ``utils.match_language`` function by a
``locales.match_locale``. The ``locales.match_locale`` function is based on the
``locales.build_engine_locales`` introduced in 9ae409a0 [1].
In the past SearXNG did only support a search by a language but not in a region.
This has been changed a long time ago and regions have been added to SearXNG
core but not to the engines. The ``utils.match_language`` was the function to
handle the different aspects of language/regions in SearXNG core and the
supported *languages* in the engine. The ``utils.match_language`` did it with
some magic and works good for most use cases but fails in some edge case.
To replace the concurrence of languages and regions in the SearXNG core the
``locales.build_engine_locales`` was introduced in 9ae409a0 [1]. With the last
patches all engines has been migrated to a ``fetch_traits`` and a
language/region concept that is based on ``locales.build_engine_locales``.
To summarize: there is no longer a need for the ``locales.match_language``.
[1] https://github.com/searxng/searxng/pull/1652
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/unit/test_locales.py | 111 | ||||
-rw-r--r-- | tests/unit/test_utils.py | 33 |
2 files changed, 111 insertions, 33 deletions
diff --git a/tests/unit/test_locales.py b/tests/unit/test_locales.py new file mode 100644 index 000000000..61561c17b --- /dev/null +++ b/tests/unit/test_locales.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Test some code from module :py:obj:`searx.locales`""" + +from searx import locales +from searx.sxng_locales import sxng_locales +from tests import SearxTestCase + + +class TestLocales(SearxTestCase): + """Implemented tests: + + - :py:obj:`searx.locales.match_locale` + """ + + def test_match_locale(self): + + locale_tag_list = [x[0] for x in sxng_locales] + + # Test SearXNG search languages + + self.assertEqual(locales.match_locale('de', locale_tag_list), 'de') + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr') + self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh') + + # Test SearXNG search regions + + self.assertEqual(locales.match_locale('ca-es', locale_tag_list), 'ca-ES') + self.assertEqual(locales.match_locale('de-at', locale_tag_list), 'de-AT') + self.assertEqual(locales.match_locale('de-de', locale_tag_list), 'de-DE') + self.assertEqual(locales.match_locale('en-UK', locale_tag_list), 'en-GB') + self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE') + self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE') + self.assertEqual(locales.match_locale('fr-ca', locale_tag_list), 'fr-CA') + self.assertEqual(locales.match_locale('fr-ch', locale_tag_list), 'fr-CH') + self.assertEqual(locales.match_locale('zh-cn', locale_tag_list), 'zh-CN') + self.assertEqual(locales.match_locale('zh-tw', locale_tag_list), 'zh-TW') + self.assertEqual(locales.match_locale('zh-hk', locale_tag_list), 'zh-HK') + + # Test language script code + + self.assertEqual(locales.match_locale('zh-hans', locale_tag_list), 'zh-CN') + self.assertEqual(locales.match_locale('zh-hans-cn', locale_tag_list), 'zh-CN') + self.assertEqual(locales.match_locale('zh-hant', locale_tag_list), 'zh-TW') + self.assertEqual(locales.match_locale('zh-hant-tw', locale_tag_list), 'zh-TW') + + # Test individual locale lists + + self.assertEqual(locales.match_locale('es', [], fallback='fallback'), 'fallback') + + self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE') + self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE') + self.assertEqual(locales.match_locale('es', ['ES']), 'ES') + self.assertEqual(locales.match_locale('es', ['es-AR', 'es-ES', 'es-MX']), 'es-ES') + self.assertEqual(locales.match_locale('es-AR', ['es-AR', 'es-ES', 'es-MX']), 'es-AR') + self.assertEqual(locales.match_locale('es-CO', ['es-AR', 'es-ES']), 'es-ES') + self.assertEqual(locales.match_locale('es-CO', ['es-AR']), 'es-AR') + + # Tests from the commit message of 9ae409a05a + + # Assumption: + # A. When a user selects a language the results should be optimized according to + # the selected language. + # + # B. When user selects a language and a territory the results should be + # optimized with first priority on territory and second on language. + + # Assume we have an engine that supports the follwoing locales: + locale_tag_list = ['zh-CN', 'zh-HK', 'nl-BE', 'fr-CA'] + + # Examples (Assumption A.) + # ------------------------ + + # A user selects region 'zh-TW' which should end in zh_HK. + # hint: CN is 'Hans' and HK ('Hant') fits better to TW ('Hant') + self.assertEqual(locales.match_locale('zh-TW', locale_tag_list), 'zh-HK') + + # A user selects only the language 'zh' which should end in CN + self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh-CN') + + # A user selects only the language 'fr' which should end in fr_CA + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-CA') + + # The difference in priority on the territory is best shown with a + # engine that supports the following locales: + locale_tag_list = ['fr-FR', 'fr-CA', 'en-GB', 'nl-BE'] + + # A user selects only a language + self.assertEqual(locales.match_locale('en', locale_tag_list), 'en-GB') + + # hint: the engine supports fr_FR and fr_CA since no territory is given, + # fr_FR takes priority .. + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-FR') + + # Examples (Assumption B.) + # ------------------------ + + # A user selects region 'fr-BE' which should end in nl-BE + self.assertEqual(locales.match_locale('fr-BE', locale_tag_list), 'nl-BE') + + # If the user selects a language and there are two locales like the + # following: + + locale_tag_list = ['fr-BE', 'fr-CH'] + + # The get_engine_locale selects the locale by looking at the "population + # percent" and this percentage has an higher amount in BE (68.%) + # compared to CH (21%) + + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-BE') diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 6f51f1ee3..2ad4593a1 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -87,39 +87,6 @@ class TestUtils(SearxTestCase): html = '<p><b>Lorem ipsum</i>dolor sit amet</p>' self.assertEqual(utils.html_to_text(html), "Lorem ipsum") - def test_match_language(self): - self.assertEqual(utils.match_language('es', ['es']), 'es') - self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback') - self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp') - - # handle script tags - self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN') - self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW') - self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN') - self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW') - self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN') - self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW') - - aliases = {'en-GB': 'en-UK', 'he': 'iw'} - - # guess country - self.assertEqual(utils.match_language('de-DE', ['de']), 'de') - self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE') - self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES') - self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX') - self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB') - self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK') - - # language aliases - self.assertEqual(utils.match_language('iw', ['he']), 'he') - self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw') - self.assertEqual(utils.match_language('iw-IL', ['he']), 'he') - self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw') - self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL') - self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL') - self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL') - self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL') - def test_ecma_unscape(self): self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space') self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: รณ') |