summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-01-31 17:29:22 +0100
committerCqoicebordel <Cqoicebordel@users.noreply.github.com>2015-01-31 17:29:22 +0100
commitd20ddf9da147647710127385a3ee95ff273d4fea (patch)
treee9607635f06d1a0d01585a58e9a9132526cb5c43
parent787fee6a09f5569f67e7bddaf73d52e159c0431c (diff)
downloadsearxng-d20ddf9da147647710127385a3ee95ff273d4fea.tar.gz
searxng-d20ddf9da147647710127385a3ee95ff273d4fea.zip
Stackoverflow's unit test
-rw-r--r--searx/engines/stackoverflow.py8
-rw-r--r--searx/tests/engines/test_stackoverflow.py106
-rw-r--r--searx/tests/test_engines.py1
3 files changed, 111 insertions, 4 deletions
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py
index dcbb1890c..78dba9f68 100644
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@@ -12,6 +12,7 @@ from urlparse import urljoin
from cgi import escape
from urllib import urlencode
from lxml import html
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['it']
@@ -24,8 +25,7 @@ search_url = url+'search?{query}&page={pageno}'
# specific xpath variables
results_xpath = '//div[contains(@class,"question-summary")]'
link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
-title_xpath = './/text()'
-content_xpath = './/div[@class="excerpt"]//text()'
+content_xpath = './/div[@class="excerpt"]'
# do search-request
@@ -46,8 +46,8 @@ def response(resp):
for result in dom.xpath(results_xpath):
link = result.xpath(link_xpath)[0]
href = urljoin(url, link.attrib.get('href'))
- title = escape(' '.join(link.xpath(title_xpath)))
- content = escape(' '.join(result.xpath(content_xpath)))
+ title = escape(extract_text(link))
+ content = escape(extract_text(result.xpath(content_xpath)))
# append result
results.append({'url': href,
diff --git a/searx/tests/engines/test_stackoverflow.py b/searx/tests/engines/test_stackoverflow.py
new file mode 100644
index 000000000..e69bafb4c
--- /dev/null
+++ b/searx/tests/engines/test_stackoverflow.py
@@ -0,0 +1,106 @@
+from collections import defaultdict
+import mock
+from searx.engines import stackoverflow
+from searx.testing import SearxTestCase
+
+
+class TestStackoverflowEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 0
+ params = stackoverflow.request(query, dicto)
+ self.assertTrue('url' in params)
+ self.assertTrue(query in params['url'])
+ self.assertTrue('stackoverflow.com' in params['url'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, stackoverflow.response, None)
+ self.assertRaises(AttributeError, stackoverflow.response, [])
+ self.assertRaises(AttributeError, stackoverflow.response, '')
+ self.assertRaises(AttributeError, stackoverflow.response, '[]')
+
+ response = mock.Mock(text='<html></html>')
+ self.assertEqual(stackoverflow.response(response), [])
+
+ html = """
+ <div class="question-summary search-result" id="answer-id-1783426">
+ <div class="statscontainer">
+ <div class="statsarrow"></div>
+ <div class="stats">
+ <div class="vote">
+ <div class="votes answered">
+ <span class="vote-count-post "><strong>2583</strong></span>
+ <div class="viewcount">votes</div>
+ </div>
+ </div>
+ </div>
+ </div>
+ <div class="summary">
+ <div class="result-link">
+ <span>
+ <a href="/questions/this.is.the.url"
+ data-searchsession="/questions"
+ title="Checkout remote Git branch">
+ This is the title
+ </a>
+ </span>
+ </div>
+ <div class="excerpt">
+ This is the content
+ </div>
+ <div class="tags user-tags t-git t-git-checkout t-remote-branch">
+ </div>
+ <div class="started fr">
+ answered <span title="2009-11-23 14:26:08Z" class="relativetime">nov 23 '09</span> by
+ <a href="/users/214090/hallski">hallski</a>
+ </div>
+ </div>
+ </div>
+ """
+ response = mock.Mock(text=html)
+ results = stackoverflow.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This is the title')
+ self.assertEqual(results[0]['url'], 'http://stackoverflow.com/questions/this.is.the.url')
+ self.assertEqual(results[0]['content'], 'This is the content')
+
+ html = """
+ <div class="statscontainer">
+ <div class="statsarrow"></div>
+ <div class="stats">
+ <div class="vote">
+ <div class="votes answered">
+ <span class="vote-count-post "><strong>2583</strong></span>
+ <div class="viewcount">votes</div>
+ </div>
+ </div>
+ </div>
+ </div>
+ <div class="summary">
+ <div class="result-link">
+ <span>
+ <a href="/questions/this.is.the.url"
+ data-searchsession="/questions"
+ title="Checkout remote Git branch">
+ This is the title
+ </a>
+ </span>
+ </div>
+ <div class="excerpt">
+ This is the content
+ </div>
+ <div class="tags user-tags t-git t-git-checkout t-remote-branch">
+ </div>
+ <div class="started fr">
+ answered <span title="2009-11-23 14:26:08Z" class="relativetime">nov 23 '09</span> by
+ <a href="/users/214090/hallski">hallski</a>
+ </div>
+ </div>
+ """
+ response = mock.Mock(text=html)
+ results = stackoverflow.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py
index 4ed1a9bba..31ad9cd4e 100644
--- a/searx/tests/test_engines.py
+++ b/searx/tests/test_engines.py
@@ -16,4 +16,5 @@ from searx.tests.engines.test_mixcloud import * # noqa
from searx.tests.engines.test_searchcode_code import * # noqa
from searx.tests.engines.test_searchcode_doc import * # noqa
from searx.tests.engines.test_soundcloud import * # noqa
+from searx.tests.engines.test_stackoverflow import * # noqa
from searx.tests.engines.test_youtube import * # noqa