diff options
author | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2018-02-28 22:30:48 -0600 |
---|---|---|
committer | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2018-03-27 00:08:03 -0600 |
commit | 772c048d01c7585fd60afca1ce30a1914e6e5b4a (patch) | |
tree | 96a5662897df2bcf0ab53456e0a67ace998f2169 /tests | |
parent | d1eae9359f8c5920632a730744ea2208070f06da (diff) | |
download | searxng-772c048d01c7585fd60afca1ce30a1914e6e5b4a.tar.gz searxng-772c048d01c7585fd60afca1ce30a1914e6e5b4a.zip |
refactor engine's search language handling
Add match_language function in utils to match any user given
language code with a list of engine's supported languages.
Also add language_aliases dict on each engine to translate
standard language codes into the custom codes used by the engine.
Diffstat (limited to 'tests')
-rw-r--r-- | tests/unit/engines/test_archlinux.py | 7 | ||||
-rw-r--r-- | tests/unit/engines/test_bing.py | 1 | ||||
-rw-r--r-- | tests/unit/engines/test_bing_images.py | 1 | ||||
-rw-r--r-- | tests/unit/engines/test_bing_news.py | 3 | ||||
-rw-r--r-- | tests/unit/engines/test_bing_videos.py | 1 | ||||
-rw-r--r-- | tests/unit/engines/test_dailymotion.py | 3 | ||||
-rw-r--r-- | tests/unit/engines/test_duckduckgo.py | 18 | ||||
-rw-r--r-- | tests/unit/engines/test_duckduckgo_definitions.py | 1 | ||||
-rw-r--r-- | tests/unit/engines/test_duckduckgo_images.py | 1 | ||||
-rw-r--r-- | tests/unit/engines/test_google.py | 7 | ||||
-rw-r--r-- | tests/unit/engines/test_google_news.py | 1 | ||||
-rw-r--r-- | tests/unit/engines/test_qwant.py | 2 | ||||
-rw-r--r-- | tests/unit/engines/test_swisscows.py | 1 | ||||
-rw-r--r-- | tests/unit/engines/test_wikidata.py | 1 | ||||
-rw-r--r-- | tests/unit/engines/test_yahoo.py | 17 | ||||
-rw-r--r-- | tests/unit/engines/test_yahoo_news.py | 3 | ||||
-rw-r--r-- | tests/unit/test_utils.py | 25 |
17 files changed, 76 insertions, 17 deletions
diff --git a/tests/unit/engines/test_archlinux.py b/tests/unit/engines/test_archlinux.py index f9e536f4e..f2ba483c1 100644 --- a/tests/unit/engines/test_archlinux.py +++ b/tests/unit/engines/test_archlinux.py @@ -19,12 +19,17 @@ class TestArchLinuxEngine(SearxTestCase): query = 'test_query' dic = defaultdict(dict) dic['pageno'] = 1 - dic['language'] = 'en_US' + dic['language'] = 'en-US' params = archlinux.request(query, dic) self.assertTrue('url' in params) self.assertTrue(query in params['url']) self.assertTrue('wiki.archlinux.org' in params['url']) + for lang, name in archlinux.main_langs: + dic['language'] = lang + params = archlinux.request(query, dic) + self.assertTrue(name in params['url']) + for lang, domain in domains.items(): dic['language'] = lang params = archlinux.request(query, dic) diff --git a/tests/unit/engines/test_bing.py b/tests/unit/engines/test_bing.py index 2528dd847..48a5e744a 100644 --- a/tests/unit/engines/test_bing.py +++ b/tests/unit/engines/test_bing.py @@ -7,6 +7,7 @@ from searx.testing import SearxTestCase class TestBingEngine(SearxTestCase): def test_request(self): + bing.supported_languages = ['en', 'fr', 'zh-CHS', 'zh-CHT', 'pt-PT', 'pt-BR'] query = u'test_query' dicto = defaultdict(dict) dicto['pageno'] = 0 diff --git a/tests/unit/engines/test_bing_images.py b/tests/unit/engines/test_bing_images.py index 3f3006124..afc4cd6f0 100644 --- a/tests/unit/engines/test_bing_images.py +++ b/tests/unit/engines/test_bing_images.py @@ -9,7 +9,6 @@ class TestBingImagesEngine(SearxTestCase): def test_request(self): bing_images.supported_languages = ['fr-FR', 'en-US'] - query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 diff --git a/tests/unit/engines/test_bing_news.py b/tests/unit/engines/test_bing_news.py index 3af19fd6d..8fc26ee32 100644 --- a/tests/unit/engines/test_bing_news.py +++ b/tests/unit/engines/test_bing_news.py @@ -8,10 +8,11 @@ import lxml class TestBingNewsEngine(SearxTestCase): def test_request(self): + bing_news.supported_languages = ['en', 'fr'] query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' dicto['time_range'] = '' params = bing_news.request(query, dicto) self.assertIn('url', params) diff --git a/tests/unit/engines/test_bing_videos.py b/tests/unit/engines/test_bing_videos.py index 8b303d637..24387c888 100644 --- a/tests/unit/engines/test_bing_videos.py +++ b/tests/unit/engines/test_bing_videos.py @@ -9,7 +9,6 @@ class TestBingVideosEngine(SearxTestCase): def test_request(self): bing_videos.supported_languages = ['fr-FR', 'en-US'] - query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 diff --git a/tests/unit/engines/test_dailymotion.py b/tests/unit/engines/test_dailymotion.py index 2009c0e4f..803b5c4d2 100644 --- a/tests/unit/engines/test_dailymotion.py +++ b/tests/unit/engines/test_dailymotion.py @@ -8,10 +8,11 @@ from searx.testing import SearxTestCase class TestDailymotionEngine(SearxTestCase): def test_request(self): + dailymotion.supported_languages = ['en', 'fr'] query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 0 - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' params = dailymotion.request(query, dicto) self.assertTrue('url' in params) self.assertTrue(query in params['url']) diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py index eea478971..eb316a404 100644 --- a/tests/unit/engines/test_duckduckgo.py +++ b/tests/unit/engines/test_duckduckgo.py @@ -1,18 +1,21 @@ # -*- coding: utf-8 -*- from collections import defaultdict import mock -from searx.engines import duckduckgo +from searx.engines import load_engine, duckduckgo from searx.testing import SearxTestCase class TestDuckduckgoEngine(SearxTestCase): def test_request(self): + duckduckgo = load_engine({'engine': 'duckduckgo', 'name': 'duckduckgo'}) + query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 - dicto['language'] = 'de-CH' dicto['time_range'] = '' + + dicto['language'] = 'de-CH' params = duckduckgo.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) @@ -20,16 +23,19 @@ class TestDuckduckgoEngine(SearxTestCase): self.assertIn('ch-de', params['url']) self.assertIn('s=0', params['url']) - # when ddg uses non standard code + # when ddg uses non standard codes + dicto['language'] = 'zh-HK' + params = duckduckgo.request(query, dicto) + self.assertIn('hk-tzh', params['url']) + dicto['language'] = 'en-GB' params = duckduckgo.request(query, dicto) self.assertIn('uk-en', params['url']) # no country given - duckduckgo.supported_languages = ['de-CH', 'en-US'] - dicto['language'] = 'de' + dicto['language'] = 'en' params = duckduckgo.request(query, dicto) - self.assertIn('ch-de', params['url']) + self.assertIn('us-en', params['url']) def test_no_url_in_request_year_time_range(self): dicto = defaultdict(dict) diff --git a/tests/unit/engines/test_duckduckgo_definitions.py b/tests/unit/engines/test_duckduckgo_definitions.py index feafe47ba..37587ed8d 100644 --- a/tests/unit/engines/test_duckduckgo_definitions.py +++ b/tests/unit/engines/test_duckduckgo_definitions.py @@ -18,6 +18,7 @@ class TestDDGDefinitionsEngine(SearxTestCase): self.assertEqual(result, 'Text in link') def test_request(self): + duckduckgo_definitions.supported_languages = ['en-US', 'es-ES'] query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 diff --git a/tests/unit/engines/test_duckduckgo_images.py b/tests/unit/engines/test_duckduckgo_images.py index 582163130..5301057fd 100644 --- a/tests/unit/engines/test_duckduckgo_images.py +++ b/tests/unit/engines/test_duckduckgo_images.py @@ -9,7 +9,6 @@ class TestDuckduckgoImagesEngine(SearxTestCase): def test_request(self): duckduckgo_images.supported_languages = ['de-CH', 'en-US'] - query = 'test_query' dicto = defaultdict(dict) dicto['is_test'] = True diff --git a/tests/unit/engines/test_google.py b/tests/unit/engines/test_google.py index ecd1ed4d9..33556cc7a 100644 --- a/tests/unit/engines/test_google.py +++ b/tests/unit/engines/test_google.py @@ -15,6 +15,8 @@ class TestGoogleEngine(SearxTestCase): return response def test_request(self): + google.supported_languages = ['en', 'fr', 'zh-CN'] + query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 @@ -31,6 +33,11 @@ class TestGoogleEngine(SearxTestCase): self.assertIn('google.co', params['url']) self.assertIn('en', params['headers']['Accept-Language']) + dicto['language'] = 'zh' + params = google.request(query, dicto) + self.assertIn('google.com', params['url']) + self.assertIn('zh-CN', params['headers']['Accept-Language']) + def test_response(self): self.assertRaises(AttributeError, google.response, None) self.assertRaises(AttributeError, google.response, []) diff --git a/tests/unit/engines/test_google_news.py b/tests/unit/engines/test_google_news.py index 3769e3be6..fbc6d344d 100644 --- a/tests/unit/engines/test_google_news.py +++ b/tests/unit/engines/test_google_news.py @@ -9,6 +9,7 @@ from searx.testing import SearxTestCase class TestGoogleNewsEngine(SearxTestCase): def test_request(self): + google_news.supported_languages = ['en-US', 'fr-FR'] query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 diff --git a/tests/unit/engines/test_qwant.py b/tests/unit/engines/test_qwant.py index 46694988c..86bfb22da 100644 --- a/tests/unit/engines/test_qwant.py +++ b/tests/unit/engines/test_qwant.py @@ -7,6 +7,7 @@ from searx.testing import SearxTestCase class TestQwantEngine(SearxTestCase): def test_request(self): + qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR'] query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 0 @@ -26,7 +27,6 @@ class TestQwantEngine(SearxTestCase): self.assertIn('en_us', params['url']) self.assertIn('news', params['url']) - qwant.supported_languages = ['en', 'fr-FR', 'fr-CA'] dicto['language'] = 'fr' params = qwant.request(query, dicto) self.assertIn('fr_fr', params['url']) diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py index 2715ef52e..133f636de 100644 --- a/tests/unit/engines/test_swisscows.py +++ b/tests/unit/engines/test_swisscows.py @@ -7,6 +7,7 @@ from searx.testing import SearxTestCase class TestSwisscowsEngine(SearxTestCase): def test_request(self): + swisscows.supported_languages = ['de-AT', 'de-DE'] query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py index 453133b64..1ad21768c 100644 --- a/tests/unit/engines/test_wikidata.py +++ b/tests/unit/engines/test_wikidata.py @@ -9,6 +9,7 @@ from searx.testing import SearxTestCase class TestWikidataEngine(SearxTestCase): def test_request(self): + wikidata.supported_languages = ['en', 'es'] query = 'test_query' dicto = defaultdict(dict) dicto['language'] = 'en-US' diff --git a/tests/unit/engines/test_yahoo.py b/tests/unit/engines/test_yahoo.py index 921d3e8cd..5037bfc7d 100644 --- a/tests/unit/engines/test_yahoo.py +++ b/tests/unit/engines/test_yahoo.py @@ -25,11 +25,12 @@ class TestYahooEngine(SearxTestCase): self.assertEqual('https://this.is.the.url/', url) def test_request(self): + yahoo.supported_languages = ['en', 'fr', 'zh-CHT', 'zh-CHS'] query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 dicto['time_range'] = '' - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' params = yahoo.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) @@ -39,6 +40,16 @@ class TestYahooEngine(SearxTestCase): self.assertIn('sB', params['cookies']) self.assertIn('fr', params['cookies']['sB']) + dicto['language'] = 'zh' + params = yahoo.request(query, dicto) + self.assertIn('zh_chs', params['url']) + self.assertIn('zh_chs', params['cookies']['sB']) + + dicto['language'] = 'zh-TW' + params = yahoo.request(query, dicto) + self.assertIn('zh_cht', params['url']) + self.assertIn('zh_cht', params['cookies']['sB']) + def test_no_url_in_request_year_time_range(self): dicto = defaultdict(dict) query = 'test_query' @@ -168,5 +179,5 @@ class TestYahooEngine(SearxTestCase): self.assertEqual(type(languages), list) self.assertEqual(len(languages), 3) self.assertIn('ar', languages) - self.assertIn('zh-chs', languages) - self.assertIn('zh-cht', languages) + self.assertIn('zh-CHS', languages) + self.assertIn('zh-CHT', languages) diff --git a/tests/unit/engines/test_yahoo_news.py b/tests/unit/engines/test_yahoo_news.py index bc87ec067..c3297dacf 100644 --- a/tests/unit/engines/test_yahoo_news.py +++ b/tests/unit/engines/test_yahoo_news.py @@ -9,10 +9,11 @@ from searx.testing import SearxTestCase class TestYahooNewsEngine(SearxTestCase): def test_request(self): + yahoo_news.supported_languages = ['en', 'fr'] query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 - dicto['language'] = 'fr_FR' + dicto['language'] = 'fr-FR' params = yahoo_news.request(query, dicto) self.assertIn('url', params) self.assertIn(query, params['url']) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index eb40e62e2..4854636c7 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -65,6 +65,31 @@ class TestUtils(SearxTestCase): for test_url, expected in data: self.assertEqual(utils.prettify_url(test_url, max_length=32), expected) + def test_match_language(self): + self.assertEqual(utils.match_language('es', ['es']), 'es') + self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback') + self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp') + + aliases = {'en-GB': 'en-UK', 'he': 'iw'} + + # guess country + self.assertEqual(utils.match_language('de-DE', ['de']), 'de') + self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE') + self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES') + self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX') + self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB') + self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK') + + # language aliases + self.assertEqual(utils.match_language('iw', ['he']), 'he') + self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw') + self.assertEqual(utils.match_language('iw-IL', ['he']), 'he') + self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw') + self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL') + self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL') + self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL') + self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL') + class TestHTMLTextExtractor(SearxTestCase): |