diff options
author | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-01-31 17:29:22 +0100 |
---|---|---|
committer | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-01-31 17:29:22 +0100 |
commit | d20ddf9da147647710127385a3ee95ff273d4fea (patch) | |
tree | e9607635f06d1a0d01585a58e9a9132526cb5c43 | |
parent | 787fee6a09f5569f67e7bddaf73d52e159c0431c (diff) | |
download | searxng-d20ddf9da147647710127385a3ee95ff273d4fea.tar.gz searxng-d20ddf9da147647710127385a3ee95ff273d4fea.zip |
Stackoverflow's unit test
-rw-r--r-- | searx/engines/stackoverflow.py | 8 | ||||
-rw-r--r-- | searx/tests/engines/test_stackoverflow.py | 106 | ||||
-rw-r--r-- | searx/tests/test_engines.py | 1 |
3 files changed, 111 insertions, 4 deletions
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index dcbb1890c..78dba9f68 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -12,6 +12,7 @@ from urlparse import urljoin from cgi import escape from urllib import urlencode from lxml import html +from searx.engines.xpath import extract_text # engine dependent config categories = ['it'] @@ -24,8 +25,7 @@ search_url = url+'search?{query}&page={pageno}' # specific xpath variables results_xpath = '//div[contains(@class,"question-summary")]' link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a' -title_xpath = './/text()' -content_xpath = './/div[@class="excerpt"]//text()' +content_xpath = './/div[@class="excerpt"]' # do search-request @@ -46,8 +46,8 @@ def response(resp): for result in dom.xpath(results_xpath): link = result.xpath(link_xpath)[0] href = urljoin(url, link.attrib.get('href')) - title = escape(' '.join(link.xpath(title_xpath))) - content = escape(' '.join(result.xpath(content_xpath))) + title = escape(extract_text(link)) + content = escape(extract_text(result.xpath(content_xpath))) # append result results.append({'url': href, diff --git a/searx/tests/engines/test_stackoverflow.py b/searx/tests/engines/test_stackoverflow.py new file mode 100644 index 000000000..e69bafb4c --- /dev/null +++ b/searx/tests/engines/test_stackoverflow.py @@ -0,0 +1,106 @@ +from collections import defaultdict +import mock +from searx.engines import stackoverflow +from searx.testing import SearxTestCase + + +class TestStackoverflowEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = stackoverflow.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('stackoverflow.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, stackoverflow.response, None) + self.assertRaises(AttributeError, stackoverflow.response, []) + self.assertRaises(AttributeError, stackoverflow.response, '') + self.assertRaises(AttributeError, stackoverflow.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(stackoverflow.response(response), []) + + html = """ + <div class="question-summary search-result" id="answer-id-1783426"> + <div class="statscontainer"> + <div class="statsarrow"></div> + <div class="stats"> + <div class="vote"> + <div class="votes answered"> + <span class="vote-count-post "><strong>2583</strong></span> + <div class="viewcount">votes</div> + </div> + </div> + </div> + </div> + <div class="summary"> + <div class="result-link"> + <span> + <a href="/questions/this.is.the.url" + data-searchsession="/questions" + title="Checkout remote Git branch"> + This is the title + </a> + </span> + </div> + <div class="excerpt"> + This is the content + </div> + <div class="tags user-tags t-git t-git-checkout t-remote-branch"> + </div> + <div class="started fr"> + answered <span title="2009-11-23 14:26:08Z" class="relativetime">nov 23 '09</span> by + <a href="/users/214090/hallski">hallski</a> + </div> + </div> + </div> + """ + response = mock.Mock(text=html) + results = stackoverflow.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://stackoverflow.com/questions/this.is.the.url') + self.assertEqual(results[0]['content'], 'This is the content') + + html = """ + <div class="statscontainer"> + <div class="statsarrow"></div> + <div class="stats"> + <div class="vote"> + <div class="votes answered"> + <span class="vote-count-post "><strong>2583</strong></span> + <div class="viewcount">votes</div> + </div> + </div> + </div> + </div> + <div class="summary"> + <div class="result-link"> + <span> + <a href="/questions/this.is.the.url" + data-searchsession="/questions" + title="Checkout remote Git branch"> + This is the title + </a> + </span> + </div> + <div class="excerpt"> + This is the content + </div> + <div class="tags user-tags t-git t-git-checkout t-remote-branch"> + </div> + <div class="started fr"> + answered <span title="2009-11-23 14:26:08Z" class="relativetime">nov 23 '09</span> by + <a href="/users/214090/hallski">hallski</a> + </div> + </div> + """ + response = mock.Mock(text=html) + results = stackoverflow.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 4ed1a9bba..31ad9cd4e 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -16,4 +16,5 @@ from searx.tests.engines.test_mixcloud import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa +from searx.tests.engines.test_stackoverflow import * # noqa from searx.tests.engines.test_youtube import * # noqa |