diff options
Diffstat (limited to 'tests/unit')
-rw-r--r-- | tests/unit/test_locales.py | 111 | ||||
-rw-r--r-- | tests/unit/test_utils.py | 33 |
2 files changed, 111 insertions, 33 deletions
diff --git a/tests/unit/test_locales.py b/tests/unit/test_locales.py new file mode 100644 index 000000000..61561c17b --- /dev/null +++ b/tests/unit/test_locales.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Test some code from module :py:obj:`searx.locales`""" + +from searx import locales +from searx.sxng_locales import sxng_locales +from tests import SearxTestCase + + +class TestLocales(SearxTestCase): + """Implemented tests: + + - :py:obj:`searx.locales.match_locale` + """ + + def test_match_locale(self): + + locale_tag_list = [x[0] for x in sxng_locales] + + # Test SearXNG search languages + + self.assertEqual(locales.match_locale('de', locale_tag_list), 'de') + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr') + self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh') + + # Test SearXNG search regions + + self.assertEqual(locales.match_locale('ca-es', locale_tag_list), 'ca-ES') + self.assertEqual(locales.match_locale('de-at', locale_tag_list), 'de-AT') + self.assertEqual(locales.match_locale('de-de', locale_tag_list), 'de-DE') + self.assertEqual(locales.match_locale('en-UK', locale_tag_list), 'en-GB') + self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE') + self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE') + self.assertEqual(locales.match_locale('fr-ca', locale_tag_list), 'fr-CA') + self.assertEqual(locales.match_locale('fr-ch', locale_tag_list), 'fr-CH') + self.assertEqual(locales.match_locale('zh-cn', locale_tag_list), 'zh-CN') + self.assertEqual(locales.match_locale('zh-tw', locale_tag_list), 'zh-TW') + self.assertEqual(locales.match_locale('zh-hk', locale_tag_list), 'zh-HK') + + # Test language script code + + self.assertEqual(locales.match_locale('zh-hans', locale_tag_list), 'zh-CN') + self.assertEqual(locales.match_locale('zh-hans-cn', locale_tag_list), 'zh-CN') + self.assertEqual(locales.match_locale('zh-hant', locale_tag_list), 'zh-TW') + self.assertEqual(locales.match_locale('zh-hant-tw', locale_tag_list), 'zh-TW') + + # Test individual locale lists + + self.assertEqual(locales.match_locale('es', [], fallback='fallback'), 'fallback') + + self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE') + self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE') + self.assertEqual(locales.match_locale('es', ['ES']), 'ES') + self.assertEqual(locales.match_locale('es', ['es-AR', 'es-ES', 'es-MX']), 'es-ES') + self.assertEqual(locales.match_locale('es-AR', ['es-AR', 'es-ES', 'es-MX']), 'es-AR') + self.assertEqual(locales.match_locale('es-CO', ['es-AR', 'es-ES']), 'es-ES') + self.assertEqual(locales.match_locale('es-CO', ['es-AR']), 'es-AR') + + # Tests from the commit message of 9ae409a05a + + # Assumption: + # A. When a user selects a language the results should be optimized according to + # the selected language. + # + # B. When user selects a language and a territory the results should be + # optimized with first priority on territory and second on language. + + # Assume we have an engine that supports the follwoing locales: + locale_tag_list = ['zh-CN', 'zh-HK', 'nl-BE', 'fr-CA'] + + # Examples (Assumption A.) + # ------------------------ + + # A user selects region 'zh-TW' which should end in zh_HK. + # hint: CN is 'Hans' and HK ('Hant') fits better to TW ('Hant') + self.assertEqual(locales.match_locale('zh-TW', locale_tag_list), 'zh-HK') + + # A user selects only the language 'zh' which should end in CN + self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh-CN') + + # A user selects only the language 'fr' which should end in fr_CA + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-CA') + + # The difference in priority on the territory is best shown with a + # engine that supports the following locales: + locale_tag_list = ['fr-FR', 'fr-CA', 'en-GB', 'nl-BE'] + + # A user selects only a language + self.assertEqual(locales.match_locale('en', locale_tag_list), 'en-GB') + + # hint: the engine supports fr_FR and fr_CA since no territory is given, + # fr_FR takes priority .. + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-FR') + + # Examples (Assumption B.) + # ------------------------ + + # A user selects region 'fr-BE' which should end in nl-BE + self.assertEqual(locales.match_locale('fr-BE', locale_tag_list), 'nl-BE') + + # If the user selects a language and there are two locales like the + # following: + + locale_tag_list = ['fr-BE', 'fr-CH'] + + # The get_engine_locale selects the locale by looking at the "population + # percent" and this percentage has an higher amount in BE (68.%) + # compared to CH (21%) + + self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-BE') diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 6f51f1ee3..2ad4593a1 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -87,39 +87,6 @@ class TestUtils(SearxTestCase): html = '<p><b>Lorem ipsum</i>dolor sit amet</p>' self.assertEqual(utils.html_to_text(html), "Lorem ipsum") - def test_match_language(self): - self.assertEqual(utils.match_language('es', ['es']), 'es') - self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback') - self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp') - - # handle script tags - self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN') - self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW') - self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN') - self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW') - self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN') - self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW') - - aliases = {'en-GB': 'en-UK', 'he': 'iw'} - - # guess country - self.assertEqual(utils.match_language('de-DE', ['de']), 'de') - self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE') - self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES') - self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX') - self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB') - self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK') - - # language aliases - self.assertEqual(utils.match_language('iw', ['he']), 'he') - self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw') - self.assertEqual(utils.match_language('iw-IL', ['he']), 'he') - self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw') - self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL') - self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL') - self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL') - self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL') - def test_ecma_unscape(self): self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space') self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: รณ') |