diff options
author | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-02-02 17:55:39 +0100 |
---|---|---|
committer | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-02-02 17:55:39 +0100 |
commit | efe6dead5566d4800587491e5252474a33ddff60 (patch) | |
tree | 34d39018c885ccf2a91a3bf73a79a29759eb8fbe | |
parent | 0e6f8393ab8b29b2e85d1fafdc7442455767f753 (diff) | |
download | searxng-efe6dead5566d4800587491e5252474a33ddff60.tar.gz searxng-efe6dead5566d4800587491e5252474a33ddff60.zip |
Duckduckgo unit test
-rw-r--r-- | searx/engines/duckduckgo.py | 10 | ||||
-rw-r--r-- | searx/tests/engines/test_duckduckgo.py | 90 | ||||
-rw-r--r-- | searx/tests/test_engines.py | 1 |
3 files changed, 96 insertions, 5 deletions
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 583e33f73..e35a6334c 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -15,7 +15,7 @@ from urllib import urlencode from lxml.html import fromstring -from searx.utils import html_to_text +from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] @@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}' # specific xpath variables result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa url_xpath = './/a[@class="large"]/@href' -title_xpath = './/a[@class="large"]//text()' -content_xpath = './/div[@class="snippet"]//text()' +title_xpath = './/a[@class="large"]' +content_xpath = './/div[@class="snippet"]' # do search-request @@ -64,8 +64,8 @@ def response(resp): if not res_url: continue - title = html_to_text(''.join(r.xpath(title_xpath))) - content = html_to_text(''.join(r.xpath(content_xpath))) + title = extract_text(r.xpath(title_xpath)) + content = extract_text(r.xpath(content_xpath)) # append result results.append({'title': title, diff --git a/searx/tests/engines/test_duckduckgo.py b/searx/tests/engines/test_duckduckgo.py new file mode 100644 index 000000000..8ff0fb7f5 --- /dev/null +++ b/searx/tests/engines/test_duckduckgo.py @@ -0,0 +1,90 @@ +from collections import defaultdict +import mock +from searx.engines import duckduckgo +from searx.testing import SearxTestCase + + +class TestBingEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = duckduckgo.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('duckduckgo.com', params['url']) + self.assertIn('fr-fr', params['url']) + + dicto['language'] = 'all' + params = duckduckgo.request(query, dicto) + self.assertIn('en-us', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, duckduckgo.response, None) + self.assertRaises(AttributeError, duckduckgo.response, []) + self.assertRaises(AttributeError, duckduckgo.response, '') + self.assertRaises(AttributeError, duckduckgo.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(duckduckgo.response(response), []) + + html = """ + <div class="results_links results_links_deep web-result"> + <div class="icon_fav" style="display: block;"> + <a rel="nofollow" href="https://www.test.com/"> + <img width="16" height="16" alt="" + src="/i/www.test.com.ico" style="visibility: visible;" name="i15" /> + </a> + </div> + <div class="links_main links_deep"> <!-- This is the visible part --> + <a rel="nofollow" class="large" href="http://this.should.be.the.link/"> + This <b>is</b> <b>the</b> title + </a> + <div class="snippet"><b>This</b> should be the content.</div> + <div class="url"> + http://this.should.be.the.link/ + </div> + </div> + </div> + """ + response = mock.Mock(text=html) + results = duckduckgo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') + self.assertEqual(results[0]['content'], 'This should be the content.') + + html = """ + <div class="results_links results_links_deep web-result"> + <div class="icon_fav" style="display: block;"> + </div> + <div class="links_main links_deep"> <!-- This is the visible part --> + <div class="snippet"><b>This</b> should be the content.</div> + <div class="url"> + http://this.should.be.the.link/ + </div> + </div> + </div> + <div class="results_links results_links_deep web-result"> + <div class="icon_fav" style="display: block;"> + <img width="16" height="16" alt="" + src="/i/www.test.com.ico" style="visibility: visible;" name="i15" /> + </div> + <div class="links_main links_deep"> <!-- This is the visible part --> + <a rel="nofollow" class="large" href=""> + This <b>is</b> <b>the</b> title + </a> + <div class="snippet"><b>This</b> should be the content.</div> + <div class="url"> + http://this.should.be.the.link/ + </div> + </div> + </div> + """ + response = mock.Mock(text=html) + results = duckduckgo.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index ff8185b1e..13fa753aa 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -6,6 +6,7 @@ from searx.tests.engines.test_dailymotion import * # noqa from searx.tests.engines.test_deezer import * # noqa from searx.tests.engines.test_deviantart import * # noqa from searx.tests.engines.test_digg import * # noqa +from searx.tests.engines.test_duckduckgo import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_flickr_noapi import * # noqa |