summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--searx/engines/bing.py9
-rw-r--r--searx/tests/engines/test_bing.py90
-rw-r--r--searx/tests/test_engines.py1
4 files changed, 97 insertions, 4 deletions
diff --git a/.gitignore b/.gitignore
index 08cf582aa..3268f8320 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,4 @@ local/
parts/
searx.egg-info/
var/
+node_modules/
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 5de461cfe..f9c323d05 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -14,6 +14,7 @@
from urllib import urlencode
from cgi import escape
from lxml import html
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['general']
@@ -55,8 +56,8 @@ def response(resp):
for result in dom.xpath('//div[@class="sa_cc"]'):
link = result.xpath('.//h3/a')[0]
url = link.attrib.get('href')
- title = ' '.join(link.xpath('.//text()'))
- content = escape(' '.join(result.xpath('.//p//text()')))
+ title = extract_text(link)
+ content = escape(extract_text(result.xpath('.//p')))
# append result
results.append({'url': url,
@@ -71,8 +72,8 @@ def response(resp):
for result in dom.xpath('//li[@class="b_algo"]'):
link = result.xpath('.//h2/a')[0]
url = link.attrib.get('href')
- title = ' '.join(link.xpath('.//text()'))
- content = escape(' '.join(result.xpath('.//p//text()')))
+ title = extract_text(link)
+ content = escape(extract_text(result.xpath('.//p')))
# append result
results.append({'url': url,
diff --git a/searx/tests/engines/test_bing.py b/searx/tests/engines/test_bing.py
new file mode 100644
index 000000000..52a049f01
--- /dev/null
+++ b/searx/tests/engines/test_bing.py
@@ -0,0 +1,90 @@
+from collections import defaultdict
+import mock
+from searx.engines import bing
+from searx.testing import SearxTestCase
+
+
+class TestBingEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 0
+ dicto['language'] = 'fr_FR'
+ params = bing.request(query, dicto)
+ self.assertTrue('url' in params)
+ self.assertTrue(query in params['url'])
+ self.assertTrue('bing.com' in params['url'])
+ self.assertTrue('SRCHHPGUSR' in params['cookies'])
+ self.assertTrue('fr' in params['cookies']['SRCHHPGUSR'])
+
+ dicto['language'] = 'all'
+ params = bing.request(query, dicto)
+ self.assertTrue('SRCHHPGUSR' in params['cookies'])
+ self.assertTrue('en' in params['cookies']['SRCHHPGUSR'])
+
+ def test_response(self):
+ self.assertRaises(AttributeError, bing.response, None)
+ self.assertRaises(AttributeError, bing.response, [])
+ self.assertRaises(AttributeError, bing.response, '')
+ self.assertRaises(AttributeError, bing.response, '[]')
+
+ response = mock.Mock(content='<html></html>')
+ self.assertEqual(bing.response(response), [])
+
+ response = mock.Mock(content='<html></html>')
+ self.assertEqual(bing.response(response), [])
+
+ html = """
+ <div class="sa_cc" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
+ <div Class="sa_mc">
+ <div class="sb_tlst">
+ <h3>
+ <a href="http://this.should.be.the.link/" h="ID=SERP,5124.1">
+ <strong>This</strong> should be the title</a>
+ </h3>
+ </div>
+ <div class="sb_meta"><cite><strong>this</strong>.meta.com</cite>
+ <span class="c_tlbxTrg">
+ <span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1">
+ </span>
+ </span>
+ </div>
+ <p><strong>This</strong> should be the content.</p>
+ </div>
+ </div>
+ """
+ response = mock.Mock(content=html)
+ results = bing.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This should be the title')
+ self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
+ self.assertEqual(results[0]['content'], 'This should be the content.')
+
+ html = """
+ <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
+ <div Class="sa_mc">
+ <div class="sb_tlst">
+ <h2>
+ <a href="http://this.should.be.the.link/" h="ID=SERP,5124.1">
+ <strong>This</strong> should be the title</a>
+ </h2>
+ </div>
+ <div class="sb_meta"><cite><strong>this</strong>.meta.com</cite>
+ <span class="c_tlbxTrg">
+ <span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1">
+ </span>
+ </span>
+ </div>
+ <p><strong>This</strong> should be the content.</p>
+ </div>
+ </li>
+ """
+ response = mock.Mock(content=html)
+ results = bing.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This should be the title')
+ self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
+ self.assertEqual(results[0]['content'], 'This should be the content.')
diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py
index 1ffdbe529..970131b48 100644
--- a/searx/tests/test_engines.py
+++ b/searx/tests/test_engines.py
@@ -1,2 +1,3 @@
+from searx.tests.engines.test_bing import * # noqa
from searx.tests.engines.test_dummy import * # noqa
from searx.tests.engines.test_github import * # noqa