diff options
-rw-r--r-- | searx/tests/engines/test_google.py | 162 | ||||
-rw-r--r-- | searx/tests/test_engines.py | 3 |
2 files changed, 164 insertions, 1 deletions
diff --git a/searx/tests/engines/test_google.py b/searx/tests/engines/test_google.py new file mode 100644 index 000000000..2c3d8e5f6 --- /dev/null +++ b/searx/tests/engines/test_google.py @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +import lxml +from searx.engines import google +from searx.testing import SearxTestCase + + +class TestGoogleEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = google.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('google.com', params['url']) + self.assertIn('PREF', params['cookies']) + self.assertIn('fr', params['headers']['Accept-Language']) + + dicto['language'] = 'all' + params = google.request(query, dicto) + self.assertIn('en', params['headers']['Accept-Language']) + + def test_response(self): + self.assertRaises(AttributeError, google.response, None) + self.assertRaises(AttributeError, google.response, []) + self.assertRaises(AttributeError, google.response, '') + self.assertRaises(AttributeError, google.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(google.response(response), []) + + html = """ + <li class="g"> + <h3 class="r"> + <a href="http://this.should.be.the.link/"> + <b>This</b> is <b>the</b> title + </a> + </h3> + <div class="s"> + <div class="kv" style="margin-bottom:2px"> + <cite> + <b>test</b>.psychologies.com/ + </cite> + <div class="_nBb"> + <div style="display:inline" onclick="google.sham(this);" aria-expanded="false" + aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA"> + <span class="_O0"> + </span> + </div> + <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1"> + <ul> + <li class="_Ykb"> + <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent + .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/"> + En cache + </a> + </li> + <li class="_Ykb"> + <a class="_Zkb" href="/search?safe=off&q=related:test.psy.com/"> + Pages similaires + </a> + </li> + </ul> + </div> + </div> + </div> + <span class="st"> + This should be the content. + </span> + <br> + <div class="osl"> + <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/"> + Test Personnalité + </a> - + <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/"> + Tests - Moi + </a> - + <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple"> + Test Couple + </a> + - + <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour"> + Test Amour + </a> + </div> + </div> + </li> + <li class="g"> + <h3 class="r"> + <a href="http://www.google.com/images?q=toto"> + <b>This</b> + </a> + </h3> + </li> + <li class="g"> + <h3 class="r"> + <a href="http://www.google.com/search?q=toto"> + <b>This</b> is + </a> + </h3> + </li> + <li class="g"> + <h3 class="r"> + <a href="€"> + <b>This</b> is <b>the</b> + </a> + </h3> + </li> + <li class="g"> + <h3 class="r"> + <a href="/url?q=url"> + <b>This</b> is <b>the</b> + </a> + </h3> + </li> + <p class="_Bmc" style="margin:3px 8px"> + <a href="/search?num=20&safe=off&q=t&revid=1754833769&sa=X&ei=-&ved="> + suggestion <b>title</b> + </a> + </p> + """ + response = mock.Mock(text=html) + results = google.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 2) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') + self.assertEqual(results[0]['content'], 'This should be the content.') + self.assertEqual(results[1]['suggestion'], 'suggestion title') + + html = """ + <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO"> + </li> + """ + response = mock.Mock(text=html) + results = google.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + def test_parse_images(self): + html = """ + <li> + <div> + <a href="http://www.google.com/url?q=http://this.is.the.url/"> + <img style="margin:3px 0;margin-right:6px;padding:0" height="90" + src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0"> + </a> + </div> + </li> + """ + dom = lxml.html.fromstring(html) + results = google.parse_images(dom) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['url'], 'http://this.is.the.url/') + self.assertEqual(results[0]['title'], '') + self.assertEqual(results[0]['content'], '') + self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg') diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 966b5f1be..81296c304 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -14,7 +14,7 @@ from searx.tests.engines.test_faroo import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_flickr_noapi import * # noqa from searx.tests.engines.test_github import * # noqa -from searx.tests.engines.test_www1x import * # noqa +from searx.tests.engines.test_google import * # noqa from searx.tests.engines.test_google_images import * # noqa from searx.tests.engines.test_google_news import * # noqa from searx.tests.engines.test_kickass import * # noqa @@ -31,6 +31,7 @@ from searx.tests.engines.test_startpage import * # noqa from searx.tests.engines.test_subtitleseeker import * # noqa from searx.tests.engines.test_twitter import * # noqa from searx.tests.engines.test_vimeo import * # noqa +from searx.tests.engines.test_www1x import * # noqa from searx.tests.engines.test_www500px import * # noqa from searx.tests.engines.test_yacy import * # noqa from searx.tests.engines.test_yahoo import * # noqa |