summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorMarc Abonce Seguin <marc-abonce@mailbox.org>2018-02-28 22:30:48 -0600
committerMarc Abonce Seguin <marc-abonce@mailbox.org>2018-03-27 00:08:03 -0600
commit772c048d01c7585fd60afca1ce30a1914e6e5b4a (patch)
tree96a5662897df2bcf0ab53456e0a67ace998f2169 /tests
parentd1eae9359f8c5920632a730744ea2208070f06da (diff)
downloadsearxng-772c048d01c7585fd60afca1ce30a1914e6e5b4a.tar.gz
searxng-772c048d01c7585fd60afca1ce30a1914e6e5b4a.zip
refactor engine's search language handling
Add match_language function in utils to match any user given language code with a list of engine's supported languages. Also add language_aliases dict on each engine to translate standard language codes into the custom codes used by the engine.
Diffstat (limited to 'tests')
-rw-r--r--tests/unit/engines/test_archlinux.py7
-rw-r--r--tests/unit/engines/test_bing.py1
-rw-r--r--tests/unit/engines/test_bing_images.py1
-rw-r--r--tests/unit/engines/test_bing_news.py3
-rw-r--r--tests/unit/engines/test_bing_videos.py1
-rw-r--r--tests/unit/engines/test_dailymotion.py3
-rw-r--r--tests/unit/engines/test_duckduckgo.py18
-rw-r--r--tests/unit/engines/test_duckduckgo_definitions.py1
-rw-r--r--tests/unit/engines/test_duckduckgo_images.py1
-rw-r--r--tests/unit/engines/test_google.py7
-rw-r--r--tests/unit/engines/test_google_news.py1
-rw-r--r--tests/unit/engines/test_qwant.py2
-rw-r--r--tests/unit/engines/test_swisscows.py1
-rw-r--r--tests/unit/engines/test_wikidata.py1
-rw-r--r--tests/unit/engines/test_yahoo.py17
-rw-r--r--tests/unit/engines/test_yahoo_news.py3
-rw-r--r--tests/unit/test_utils.py25
17 files changed, 76 insertions, 17 deletions
diff --git a/tests/unit/engines/test_archlinux.py b/tests/unit/engines/test_archlinux.py
index f9e536f4e..f2ba483c1 100644
--- a/tests/unit/engines/test_archlinux.py
+++ b/tests/unit/engines/test_archlinux.py
@@ -19,12 +19,17 @@ class TestArchLinuxEngine(SearxTestCase):
query = 'test_query'
dic = defaultdict(dict)
dic['pageno'] = 1
- dic['language'] = 'en_US'
+ dic['language'] = 'en-US'
params = archlinux.request(query, dic)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
self.assertTrue('wiki.archlinux.org' in params['url'])
+ for lang, name in archlinux.main_langs:
+ dic['language'] = lang
+ params = archlinux.request(query, dic)
+ self.assertTrue(name in params['url'])
+
for lang, domain in domains.items():
dic['language'] = lang
params = archlinux.request(query, dic)
diff --git a/tests/unit/engines/test_bing.py b/tests/unit/engines/test_bing.py
index 2528dd847..48a5e744a 100644
--- a/tests/unit/engines/test_bing.py
+++ b/tests/unit/engines/test_bing.py
@@ -7,6 +7,7 @@ from searx.testing import SearxTestCase
class TestBingEngine(SearxTestCase):
def test_request(self):
+ bing.supported_languages = ['en', 'fr', 'zh-CHS', 'zh-CHT', 'pt-PT', 'pt-BR']
query = u'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
diff --git a/tests/unit/engines/test_bing_images.py b/tests/unit/engines/test_bing_images.py
index 3f3006124..afc4cd6f0 100644
--- a/tests/unit/engines/test_bing_images.py
+++ b/tests/unit/engines/test_bing_images.py
@@ -9,7 +9,6 @@ class TestBingImagesEngine(SearxTestCase):
def test_request(self):
bing_images.supported_languages = ['fr-FR', 'en-US']
-
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
diff --git a/tests/unit/engines/test_bing_news.py b/tests/unit/engines/test_bing_news.py
index 3af19fd6d..8fc26ee32 100644
--- a/tests/unit/engines/test_bing_news.py
+++ b/tests/unit/engines/test_bing_news.py
@@ -8,10 +8,11 @@ import lxml
class TestBingNewsEngine(SearxTestCase):
def test_request(self):
+ bing_news.supported_languages = ['en', 'fr']
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
- dicto['language'] = 'fr_FR'
+ dicto['language'] = 'fr-FR'
dicto['time_range'] = ''
params = bing_news.request(query, dicto)
self.assertIn('url', params)
diff --git a/tests/unit/engines/test_bing_videos.py b/tests/unit/engines/test_bing_videos.py
index 8b303d637..24387c888 100644
--- a/tests/unit/engines/test_bing_videos.py
+++ b/tests/unit/engines/test_bing_videos.py
@@ -9,7 +9,6 @@ class TestBingVideosEngine(SearxTestCase):
def test_request(self):
bing_videos.supported_languages = ['fr-FR', 'en-US']
-
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
diff --git a/tests/unit/engines/test_dailymotion.py b/tests/unit/engines/test_dailymotion.py
index 2009c0e4f..803b5c4d2 100644
--- a/tests/unit/engines/test_dailymotion.py
+++ b/tests/unit/engines/test_dailymotion.py
@@ -8,10 +8,11 @@ from searx.testing import SearxTestCase
class TestDailymotionEngine(SearxTestCase):
def test_request(self):
+ dailymotion.supported_languages = ['en', 'fr']
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
- dicto['language'] = 'fr_FR'
+ dicto['language'] = 'fr-FR'
params = dailymotion.request(query, dicto)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py
index eea478971..eb316a404 100644
--- a/tests/unit/engines/test_duckduckgo.py
+++ b/tests/unit/engines/test_duckduckgo.py
@@ -1,18 +1,21 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
-from searx.engines import duckduckgo
+from searx.engines import load_engine, duckduckgo
from searx.testing import SearxTestCase
class TestDuckduckgoEngine(SearxTestCase):
def test_request(self):
+ duckduckgo = load_engine({'engine': 'duckduckgo', 'name': 'duckduckgo'})
+
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
- dicto['language'] = 'de-CH'
dicto['time_range'] = ''
+
+ dicto['language'] = 'de-CH'
params = duckduckgo.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
@@ -20,16 +23,19 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertIn('ch-de', params['url'])
self.assertIn('s=0', params['url'])
- # when ddg uses non standard code
+ # when ddg uses non standard codes
+ dicto['language'] = 'zh-HK'
+ params = duckduckgo.request(query, dicto)
+ self.assertIn('hk-tzh', params['url'])
+
dicto['language'] = 'en-GB'
params = duckduckgo.request(query, dicto)
self.assertIn('uk-en', params['url'])
# no country given
- duckduckgo.supported_languages = ['de-CH', 'en-US']
- dicto['language'] = 'de'
+ dicto['language'] = 'en'
params = duckduckgo.request(query, dicto)
- self.assertIn('ch-de', params['url'])
+ self.assertIn('us-en', params['url'])
def test_no_url_in_request_year_time_range(self):
dicto = defaultdict(dict)
diff --git a/tests/unit/engines/test_duckduckgo_definitions.py b/tests/unit/engines/test_duckduckgo_definitions.py
index feafe47ba..37587ed8d 100644
--- a/tests/unit/engines/test_duckduckgo_definitions.py
+++ b/tests/unit/engines/test_duckduckgo_definitions.py
@@ -18,6 +18,7 @@ class TestDDGDefinitionsEngine(SearxTestCase):
self.assertEqual(result, 'Text in link')
def test_request(self):
+ duckduckgo_definitions.supported_languages = ['en-US', 'es-ES']
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
diff --git a/tests/unit/engines/test_duckduckgo_images.py b/tests/unit/engines/test_duckduckgo_images.py
index 582163130..5301057fd 100644
--- a/tests/unit/engines/test_duckduckgo_images.py
+++ b/tests/unit/engines/test_duckduckgo_images.py
@@ -9,7 +9,6 @@ class TestDuckduckgoImagesEngine(SearxTestCase):
def test_request(self):
duckduckgo_images.supported_languages = ['de-CH', 'en-US']
-
query = 'test_query'
dicto = defaultdict(dict)
dicto['is_test'] = True
diff --git a/tests/unit/engines/test_google.py b/tests/unit/engines/test_google.py
index ecd1ed4d9..33556cc7a 100644
--- a/tests/unit/engines/test_google.py
+++ b/tests/unit/engines/test_google.py
@@ -15,6 +15,8 @@ class TestGoogleEngine(SearxTestCase):
return response
def test_request(self):
+ google.supported_languages = ['en', 'fr', 'zh-CN']
+
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
@@ -31,6 +33,11 @@ class TestGoogleEngine(SearxTestCase):
self.assertIn('google.co', params['url'])
self.assertIn('en', params['headers']['Accept-Language'])
+ dicto['language'] = 'zh'
+ params = google.request(query, dicto)
+ self.assertIn('google.com', params['url'])
+ self.assertIn('zh-CN', params['headers']['Accept-Language'])
+
def test_response(self):
self.assertRaises(AttributeError, google.response, None)
self.assertRaises(AttributeError, google.response, [])
diff --git a/tests/unit/engines/test_google_news.py b/tests/unit/engines/test_google_news.py
index 3769e3be6..fbc6d344d 100644
--- a/tests/unit/engines/test_google_news.py
+++ b/tests/unit/engines/test_google_news.py
@@ -9,6 +9,7 @@ from searx.testing import SearxTestCase
class TestGoogleNewsEngine(SearxTestCase):
def test_request(self):
+ google_news.supported_languages = ['en-US', 'fr-FR']
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
diff --git a/tests/unit/engines/test_qwant.py b/tests/unit/engines/test_qwant.py
index 46694988c..86bfb22da 100644
--- a/tests/unit/engines/test_qwant.py
+++ b/tests/unit/engines/test_qwant.py
@@ -7,6 +7,7 @@ from searx.testing import SearxTestCase
class TestQwantEngine(SearxTestCase):
def test_request(self):
+ qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR']
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
@@ -26,7 +27,6 @@ class TestQwantEngine(SearxTestCase):
self.assertIn('en_us', params['url'])
self.assertIn('news', params['url'])
- qwant.supported_languages = ['en', 'fr-FR', 'fr-CA']
dicto['language'] = 'fr'
params = qwant.request(query, dicto)
self.assertIn('fr_fr', params['url'])
diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py
index 2715ef52e..133f636de 100644
--- a/tests/unit/engines/test_swisscows.py
+++ b/tests/unit/engines/test_swisscows.py
@@ -7,6 +7,7 @@ from searx.testing import SearxTestCase
class TestSwisscowsEngine(SearxTestCase):
def test_request(self):
+ swisscows.supported_languages = ['de-AT', 'de-DE']
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py
index 453133b64..1ad21768c 100644
--- a/tests/unit/engines/test_wikidata.py
+++ b/tests/unit/engines/test_wikidata.py
@@ -9,6 +9,7 @@ from searx.testing import SearxTestCase
class TestWikidataEngine(SearxTestCase):
def test_request(self):
+ wikidata.supported_languages = ['en', 'es']
query = 'test_query'
dicto = defaultdict(dict)
dicto['language'] = 'en-US'
diff --git a/tests/unit/engines/test_yahoo.py b/tests/unit/engines/test_yahoo.py
index 921d3e8cd..5037bfc7d 100644
--- a/tests/unit/engines/test_yahoo.py
+++ b/tests/unit/engines/test_yahoo.py
@@ -25,11 +25,12 @@ class TestYahooEngine(SearxTestCase):
self.assertEqual('https://this.is.the.url/', url)
def test_request(self):
+ yahoo.supported_languages = ['en', 'fr', 'zh-CHT', 'zh-CHS']
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
dicto['time_range'] = ''
- dicto['language'] = 'fr_FR'
+ dicto['language'] = 'fr-FR'
params = yahoo.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
@@ -39,6 +40,16 @@ class TestYahooEngine(SearxTestCase):
self.assertIn('sB', params['cookies'])
self.assertIn('fr', params['cookies']['sB'])
+ dicto['language'] = 'zh'
+ params = yahoo.request(query, dicto)
+ self.assertIn('zh_chs', params['url'])
+ self.assertIn('zh_chs', params['cookies']['sB'])
+
+ dicto['language'] = 'zh-TW'
+ params = yahoo.request(query, dicto)
+ self.assertIn('zh_cht', params['url'])
+ self.assertIn('zh_cht', params['cookies']['sB'])
+
def test_no_url_in_request_year_time_range(self):
dicto = defaultdict(dict)
query = 'test_query'
@@ -168,5 +179,5 @@ class TestYahooEngine(SearxTestCase):
self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3)
self.assertIn('ar', languages)
- self.assertIn('zh-chs', languages)
- self.assertIn('zh-cht', languages)
+ self.assertIn('zh-CHS', languages)
+ self.assertIn('zh-CHT', languages)
diff --git a/tests/unit/engines/test_yahoo_news.py b/tests/unit/engines/test_yahoo_news.py
index bc87ec067..c3297dacf 100644
--- a/tests/unit/engines/test_yahoo_news.py
+++ b/tests/unit/engines/test_yahoo_news.py
@@ -9,10 +9,11 @@ from searx.testing import SearxTestCase
class TestYahooNewsEngine(SearxTestCase):
def test_request(self):
+ yahoo_news.supported_languages = ['en', 'fr']
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
- dicto['language'] = 'fr_FR'
+ dicto['language'] = 'fr-FR'
params = yahoo_news.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index eb40e62e2..4854636c7 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -65,6 +65,31 @@ class TestUtils(SearxTestCase):
for test_url, expected in data:
self.assertEqual(utils.prettify_url(test_url, max_length=32), expected)
+ def test_match_language(self):
+ self.assertEqual(utils.match_language('es', ['es']), 'es')
+ self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
+ self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')
+
+ aliases = {'en-GB': 'en-UK', 'he': 'iw'}
+
+ # guess country
+ self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
+ self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
+ self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
+ self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
+ self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB')
+ self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK')
+
+ # language aliases
+ self.assertEqual(utils.match_language('iw', ['he']), 'he')
+ self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
+ self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
+ self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
+ self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
+ self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL')
+ self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
+ self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
+
class TestHTMLTextExtractor(SearxTestCase):