diff options
author | a01200356 <a01200356@itesm.mx> | 2016-05-19 00:38:43 -0500 |
---|---|---|
committer | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2020-10-25 17:59:05 -0700 |
commit | c3daa08537668c24224fffecbed4347fee936fcf (patch) | |
tree | 23cfde77015e13e8687bf08bee9d5a4271b7af7f /tests | |
parent | 0a44fa8bb7eca0d81f0ebdab37b9845b260473ad (diff) | |
download | searxng-c3daa08537668c24224fffecbed4347fee936fcf.tar.gz searxng-c3daa08537668c24224fffecbed4347fee936fcf.zip |
[enh] Add onions category with Ahmia, Not Evil and Torch
Xpath engine and results template changed to account for the fact that
archive.org doesn't cache .onions, though some onion engines migth have
their own cache.
Disabled by default. Can be enabled by setting the SOCKS proxies to
wherever Tor is listening and setting using_tor_proxy as True.
Requires Tor and updating packages.
To avoid manually adding the timeout on each engine, you can set
extra_proxy_timeout to account for Tor's (or whatever proxy used) extra
time.
Diffstat (limited to 'tests')
-rw-r--r-- | tests/unit/engines/test_xpath.py | 121 | ||||
-rw-r--r-- | tests/unit/test_engines_init.py | 44 |
2 files changed, 165 insertions, 0 deletions
diff --git a/tests/unit/engines/test_xpath.py b/tests/unit/engines/test_xpath.py new file mode 100644 index 000000000..963a44a25 --- /dev/null +++ b/tests/unit/engines/test_xpath.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import xpath +from searx.testing import SearxTestCase + + +class TestXpathEngine(SearxTestCase): + + def test_request(self): + xpath.search_url = 'https://url.com/{query}' + xpath.categories = [] + xpath.paging = False + query = 'test_query' + dicto = defaultdict(dict) + params = xpath.request(query, dicto) + self.assertIn('url', params) + self.assertEquals('https://url.com/test_query', params['url']) + + xpath.search_url = 'https://url.com/q={query}&p={pageno}' + xpath.paging = True + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = xpath.request(query, dicto) + self.assertIn('url', params) + self.assertEquals('https://url.com/q=test_query&p=1', params['url']) + + def test_response(self): + # without results_xpath + xpath.url_xpath = '//div[@class="search_result"]//a[@class="result"]/@href' + xpath.title_xpath = '//div[@class="search_result"]//a[@class="result"]' + xpath.content_xpath = '//div[@class="search_result"]//p[@class="content"]' + + self.assertRaises(AttributeError, xpath.response, None) + self.assertRaises(AttributeError, xpath.response, []) + self.assertRaises(AttributeError, xpath.response, '') + self.assertRaises(AttributeError, xpath.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(xpath.response(response), []) + + html = u""" + <div> + <div class="search_result"> + <a class="result" href="https://result1.com">Result 1</a> + <p class="content">Content 1</p> + <a class="cached" href="https://cachedresult1.com">Cache</a> + </div> + <div class="search_result"> + <a class="result" href="https://result2.com">Result 2</a> + <p class="content">Content 2</p> + <a class="cached" href="https://cachedresult2.com">Cache</a> + </div> + </div> + """ + response = mock.Mock(text=html) + results = xpath.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['title'], 'Result 1') + self.assertEqual(results[0]['url'], 'https://result1.com/') + self.assertEqual(results[0]['content'], 'Content 1') + self.assertEqual(results[1]['title'], 'Result 2') + self.assertEqual(results[1]['url'], 'https://result2.com/') + self.assertEqual(results[1]['content'], 'Content 2') + + # with cached urls, without results_xpath + xpath.cached_xpath = '//div[@class="search_result"]//a[@class="cached"]/@href' + results = xpath.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['cached_url'], 'https://cachedresult1.com') + self.assertEqual(results[1]['cached_url'], 'https://cachedresult2.com') + self.assertFalse(results[0].get('is_onion', False)) + + # results are onion urls (no results_xpath) + xpath.categories = ['onions'] + results = xpath.response(response) + self.assertTrue(results[0]['is_onion']) + + # with results_xpath + xpath.results_xpath = '//div[@class="search_result"]' + xpath.url_xpath = './/a[@class="result"]/@href' + xpath.title_xpath = './/a[@class="result"]' + xpath.content_xpath = './/p[@class="content"]' + xpath.cached_xpath = None + xpath.categories = [] + + self.assertRaises(AttributeError, xpath.response, None) + self.assertRaises(AttributeError, xpath.response, []) + self.assertRaises(AttributeError, xpath.response, '') + self.assertRaises(AttributeError, xpath.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(xpath.response(response), []) + + response = mock.Mock(text=html) + results = xpath.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['title'], 'Result 1') + self.assertEqual(results[0]['url'], 'https://result1.com/') + self.assertEqual(results[0]['content'], 'Content 1') + self.assertEqual(results[1]['title'], 'Result 2') + self.assertEqual(results[1]['url'], 'https://result2.com/') + self.assertEqual(results[1]['content'], 'Content 2') + + # with cached urls, with results_xpath + xpath.cached_xpath = './/a[@class="cached"]/@href' + results = xpath.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['cached_url'], 'https://cachedresult1.com') + self.assertEqual(results[1]['cached_url'], 'https://cachedresult2.com') + self.assertFalse(results[0].get('is_onion', False)) + + # results are onion urls (with results_xpath) + xpath.categories = ['onions'] + results = xpath.response(response) + self.assertTrue(results[0]['is_onion']) diff --git a/tests/unit/test_engines_init.py b/tests/unit/test_engines_init.py new file mode 100644 index 000000000..cf4d50309 --- /dev/null +++ b/tests/unit/test_engines_init.py @@ -0,0 +1,44 @@ +from searx.testing import SearxTestCase +from searx import settings, engines + + +class TestEnginesInit(SearxTestCase): + + @classmethod + def tearDownClass(cls): + settings['outgoing']['using_tor_proxy'] = False + settings['outgoing']['extra_proxy_timeout'] = 0 + + def test_initialize_engines_default(self): + engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}, + {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2'}] + + engines.initialize_engines(engine_list) + self.assertEqual(len(engines.engines), 2) + self.assertIn('engine1', engines.engines) + self.assertIn('engine2', engines.engines) + + def test_initialize_engines_exclude_onions(self): + settings['outgoing']['using_tor_proxy'] = False + engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general'}, + {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}] + + engines.initialize_engines(engine_list) + self.assertEqual(len(engines.engines), 1) + self.assertIn('engine1', engines.engines) + self.assertNotIn('onions', engines.categories) + + def test_initialize_engines_include_onions(self): + settings['outgoing']['using_tor_proxy'] = True + settings['outgoing']['extra_proxy_timeout'] = 100.0 + engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1', 'categories': 'general', + 'timeout': 20.0, 'onion_url': 'http://engine1.onion'}, + {'engine': 'dummy', 'name': 'engine2', 'shortcut': 'e2', 'categories': 'onions'}] + + engines.initialize_engines(engine_list) + self.assertEqual(len(engines.engines), 2) + self.assertIn('engine1', engines.engines) + self.assertIn('engine2', engines.engines) + self.assertIn('onions', engines.categories) + self.assertIn('http://engine1.onion', engines.engines['engine1'].search_url) + self.assertEqual(engines.engines['engine1'].timeout, 120.0) |