2 files changed, 111 insertions, 33 deletions
diff --git a/tests/unit/test_locales.py b/tests/unit/test_locales.py
new file mode 100644
index 000000000..61561c17b
--- /dev/null
+++ b/tests/unit/test_locales.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Test some code from module :py:obj:`searx.locales`"""
+
+from searx import locales
+from searx.sxng_locales import sxng_locales
+from tests import SearxTestCase
+
+
+class TestLocales(SearxTestCase):
+    """Implemented tests:
+
+    - :py:obj:`searx.locales.match_locale`
+    """
+
+    def test_match_locale(self):
+
+        locale_tag_list = [x[0] for x in sxng_locales]
+
+        # Test SearXNG search languages
+
+        self.assertEqual(locales.match_locale('de', locale_tag_list), 'de')
+        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr')
+        self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh')
+
+        # Test SearXNG search regions
+
+        self.assertEqual(locales.match_locale('ca-es', locale_tag_list), 'ca-ES')
+        self.assertEqual(locales.match_locale('de-at', locale_tag_list), 'de-AT')
+        self.assertEqual(locales.match_locale('de-de', locale_tag_list), 'de-DE')
+        self.assertEqual(locales.match_locale('en-UK', locale_tag_list), 'en-GB')
+        self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE')
+        self.assertEqual(locales.match_locale('fr-be', locale_tag_list), 'fr-BE')
+        self.assertEqual(locales.match_locale('fr-ca', locale_tag_list), 'fr-CA')
+        self.assertEqual(locales.match_locale('fr-ch', locale_tag_list), 'fr-CH')
+        self.assertEqual(locales.match_locale('zh-cn', locale_tag_list), 'zh-CN')
+        self.assertEqual(locales.match_locale('zh-tw', locale_tag_list), 'zh-TW')
+        self.assertEqual(locales.match_locale('zh-hk', locale_tag_list), 'zh-HK')
+
+        # Test language script code
+
+        self.assertEqual(locales.match_locale('zh-hans', locale_tag_list), 'zh-CN')
+        self.assertEqual(locales.match_locale('zh-hans-cn', locale_tag_list), 'zh-CN')
+        self.assertEqual(locales.match_locale('zh-hant', locale_tag_list), 'zh-TW')
+        self.assertEqual(locales.match_locale('zh-hant-tw', locale_tag_list), 'zh-TW')
+
+        # Test individual locale lists
+
+        self.assertEqual(locales.match_locale('es', [], fallback='fallback'), 'fallback')
+
+        self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE')
+        self.assertEqual(locales.match_locale('de', ['de-CH', 'de-DE']), 'de-DE')
+        self.assertEqual(locales.match_locale('es', ['ES']), 'ES')
+        self.assertEqual(locales.match_locale('es', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
+        self.assertEqual(locales.match_locale('es-AR', ['es-AR', 'es-ES', 'es-MX']), 'es-AR')
+        self.assertEqual(locales.match_locale('es-CO', ['es-AR', 'es-ES']), 'es-ES')
+        self.assertEqual(locales.match_locale('es-CO', ['es-AR']), 'es-AR')
+
+        # Tests from the commit message of 9ae409a05a
+
+        # Assumption:
+        #   A. When a user selects a language the results should be optimized according to
+        #      the selected language.
+        #
+        #   B. When user selects a language and a territory the results should be
+        #      optimized with first priority on territory and second on language.
+
+        # Assume we have an engine that supports the follwoing locales:
+        locale_tag_list = ['zh-CN', 'zh-HK', 'nl-BE', 'fr-CA']
+
+        # Examples (Assumption A.)
+        # ------------------------
+
+        # A user selects region 'zh-TW' which should end in zh_HK.
+        # hint: CN is 'Hans' and HK ('Hant') fits better to TW ('Hant')
+        self.assertEqual(locales.match_locale('zh-TW', locale_tag_list), 'zh-HK')
+
+        # A user selects only the language 'zh' which should end in CN
+        self.assertEqual(locales.match_locale('zh', locale_tag_list), 'zh-CN')
+
+        # A user selects only the language 'fr' which should end in fr_CA
+        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-CA')
+
+        # The difference in priority on the territory is best shown with a
+        # engine that supports the following locales:
+        locale_tag_list = ['fr-FR', 'fr-CA', 'en-GB', 'nl-BE']
+
+        # A user selects only a language
+        self.assertEqual(locales.match_locale('en', locale_tag_list), 'en-GB')
+
+        # hint: the engine supports fr_FR and fr_CA since no territory is given,
+        # fr_FR takes priority ..
+        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-FR')
+
+        # Examples (Assumption B.)
+        # ------------------------
+
+        #  A user selects region 'fr-BE' which should end in nl-BE
+        self.assertEqual(locales.match_locale('fr-BE', locale_tag_list), 'nl-BE')
+
+        # If the user selects a language and there are two locales like the
+        # following:
+
+        locale_tag_list = ['fr-BE', 'fr-CH']
+
+        # The get_engine_locale selects the locale by looking at the "population
+        # percent" and this percentage has an higher amount in BE (68.%)
+        # compared to CH (21%)
+
+        self.assertEqual(locales.match_locale('fr', locale_tag_list), 'fr-BE')
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 6f51f1ee3..2ad4593a1 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -87,39 +87,6 @@ class TestUtils(SearxTestCase):
         html = '<p><b>Lorem ipsum</i>dolor sit amet</p>'
         self.assertEqual(utils.html_to_text(html), "Lorem ipsum")
 
-    def test_match_language(self):
-        self.assertEqual(utils.match_language('es', ['es']), 'es')
-        self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
-        self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')
-
-        # handle script tags
-        self.assertEqual(utils.match_language('zh-CN', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hans-CN')
-        self.assertEqual(utils.match_language('zh-TW', ['zh-Hans-CN', 'zh-Hant-TW']), 'zh-Hant-TW')
-        self.assertEqual(utils.match_language('zh-Hans-CN', ['zh-CN', 'zh-TW']), 'zh-CN')
-        self.assertEqual(utils.match_language('zh-Hant-TW', ['zh-CN', 'zh-TW']), 'zh-TW')
-        self.assertEqual(utils.match_language('zh-Hans', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-CN')
-        self.assertEqual(utils.match_language('zh-Hant', ['zh-CN', 'zh-TW', 'zh-HK']), 'zh-TW')
-
-        aliases = {'en-GB': 'en-UK', 'he': 'iw'}
-
-        # guess country
-        self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
-        self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
-        self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
-        self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
-        self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB')
-        self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK')
-
-        # language aliases
-        self.assertEqual(utils.match_language('iw', ['he']), 'he')
-        self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
-        self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
-        self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
-        self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
-        self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL')
-        self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
-        self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
-
     def test_ecma_unscape(self):
         self.assertEqual(utils.ecma_unescape('text%20with%20space'), 'text with space')
         self.assertEqual(utils.ecma_unescape('text using %xx: %F3'), 'text using %xx: ó')