diff options
-rw-r--r-- | searx/engines/subtitleseeker.py | 15 | ||||
-rw-r--r-- | searx/tests/engines/test_subtitleseeker.py | 169 | ||||
-rw-r--r-- | searx/tests/test_engines.py | 1 |
3 files changed, 178 insertions, 7 deletions
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index 9aaf1947b..acefe30ea 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -12,6 +12,7 @@ from cgi import escape from urllib import quote_plus from lxml import html from searx.languages import language_codes +from searx.engines.xpath import extract_text # engine dependent config categories = ['videos'] @@ -20,7 +21,7 @@ language = "" # search-url url = 'http://www.subtitleseeker.com/' -search_url = url+'search/TITLES/{query}&p={pageno}' +search_url = url + 'search/TITLES/{query}&p={pageno}' # specific xpath variables results_xpath = '//div[@class="boxRows"]' @@ -44,7 +45,7 @@ def response(resp): if resp.search_params['language'] != 'all': search_lang = [lc[1] for lc in language_codes - if lc[0][:2] == resp.search_params['language']][0] + if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] # parse results for result in dom.xpath(results_xpath): @@ -56,17 +57,17 @@ def response(resp): elif search_lang: href = href + search_lang + '/' - title = escape(link.xpath(".//text()")[0]) + title = escape(extract_text(link)) - content = result.xpath('.//div[contains(@class,"red")]//text()')[0] + content = extract_text(result.xpath('.//div[contains(@class,"red")]')) content = content + " - " - text = result.xpath('.//div[contains(@class,"grey-web")]')[0] - content = content + html.tostring(text, method='text') + text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0]) + content = content + text if result.xpath(".//span") != []: content = content +\ " - (" +\ - result.xpath(".//span//text()")[0].strip() +\ + extract_text(result.xpath(".//span")) +\ ")" # append result diff --git a/searx/tests/engines/test_subtitleseeker.py b/searx/tests/engines/test_subtitleseeker.py new file mode 100644 index 000000000..a641601b2 --- /dev/null +++ b/searx/tests/engines/test_subtitleseeker.py @@ -0,0 +1,169 @@ +from collections import defaultdict +import mock +from searx.engines import subtitleseeker +from searx.testing import SearxTestCase + + +class TestSubtitleseekerEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = subtitleseeker.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('subtitleseeker.com' in params['url']) + + def test_response(self): + dicto = defaultdict(dict) + dicto['language'] = 'fr_FR' + response = mock.Mock(search_params=dicto) + + self.assertRaises(AttributeError, subtitleseeker.response, None) + self.assertRaises(AttributeError, subtitleseeker.response, []) + self.assertRaises(AttributeError, subtitleseeker.response, '') + self.assertRaises(AttributeError, subtitleseeker.response, '[]') + + response = mock.Mock(text='<html></html>', search_params=dicto) + self.assertEqual(subtitleseeker.response(response), []) + + html = """ + <div class="boxRows"> + <div class="boxRowsInner" style="width:600px;"> + <img src="http://static.subtitleseeker.com/images/movie.gif" + style="width:16px; height:16px;" class="icon"> + <a href="http://this.is.the.url/" + class="blue" title="Title subtitle" > + This is the Title + </a> + <br><br> + <span class="f10b grey-dark arial" style="padding:0px 0px 5px 20px"> + "Alternative Title" + </span> + </div> + <div class="boxRowsInner f12b red" style="width:70px;"> + 1998 + </div> + <div class="boxRowsInner grey-web f12" style="width:120px;"> + <img src="http://static.subtitleseeker.com/images/basket_put.png" + style="width:16px; height:16px;" class="icon"> + 1039 Subs + </div> + <div class="boxRowsInner grey-web f10" style="width:130px;"> + <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" + style="width:16px; height:16px;" class="icon"> + 1 hours ago + </div> + <div class="clear"></div> + </div> + """ + response = mock.Mock(text=html, search_params=dicto) + results = subtitleseeker.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the Title') + self.assertEqual(results[0]['url'], 'http://this.is.the.url/French/') + self.assertIn('1998', results[0]['content']) + self.assertIn('1039 Subs', results[0]['content']) + self.assertIn('Alternative Title', results[0]['content']) + + html = """ + <div class="boxRows"> + <div class="boxRowsInner" style="width:600px;"> + <img src="http://static.subtitleseeker.com/images/movie.gif" + style="width:16px; height:16px;" class="icon"> + <a href="http://this.is.the.url/" + class="blue" title="Title subtitle" > + This is the Title + </a> + </div> + <div class="boxRowsInner f12b red" style="width:70px;"> + 1998 + </div> + <div class="boxRowsInner grey-web f12" style="width:120px;"> + <img src="http://static.subtitleseeker.com/images/basket_put.png" + style="width:16px; height:16px;" class="icon"> + 1039 Subs + </div> + <div class="boxRowsInner grey-web f10" style="width:130px;"> + <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" + style="width:16px; height:16px;" class="icon"> + 1 hours ago + </div> + <div class="clear"></div> + </div> + """ + dicto['language'] = 'all' + response = mock.Mock(text=html, search_params=dicto) + results = subtitleseeker.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the Title') + self.assertEqual(results[0]['url'], 'http://this.is.the.url/') + self.assertIn('1998', results[0]['content']) + self.assertIn('1039 Subs', results[0]['content']) + + html = """ + <div class="boxRows"> + <div class="boxRowsInner" style="width:600px;"> + <img src="http://static.subtitleseeker.com/images/movie.gif" + style="width:16px; height:16px;" class="icon"> + <a href="http://this.is.the.url/" + class="blue" title="Title subtitle" > + This is the Title + </a> + </div> + <div class="boxRowsInner f12b red" style="width:70px;"> + 1998 + </div> + <div class="boxRowsInner grey-web f12" style="width:120px;"> + <img src="http://static.subtitleseeker.com/images/basket_put.png" + style="width:16px; height:16px;" class="icon"> + 1039 Subs + </div> + <div class="boxRowsInner grey-web f10" style="width:130px;"> + <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" + style="width:16px; height:16px;" class="icon"> + 1 hours ago + </div> + <div class="clear"></div> + </div> + """ + subtitleseeker.language = 'English' + response = mock.Mock(text=html, search_params=dicto) + results = subtitleseeker.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the Title') + self.assertEqual(results[0]['url'], 'http://this.is.the.url/English/') + self.assertIn('1998', results[0]['content']) + self.assertIn('1039 Subs', results[0]['content']) + + html = """ + <div class="boxRowsInner" style="width:600px;"> + <img src="http://static.subtitleseeker.com/images/movie.gif" + style="width:16px; height:16px;" class="icon"> + <a href="http://this.is.the.url/" + class="blue" title="Title subtitle" > + This is the Title + </a> + </div> + <div class="boxRowsInner f12b red" style="width:70px;"> + 1998 + </div> + <div class="boxRowsInner grey-web f12" style="width:120px;"> + <img src="http://static.subtitleseeker.com/images/basket_put.png" + style="width:16px; height:16px;" class="icon"> + 1039 Subs + </div> + <div class="boxRowsInner grey-web f10" style="width:130px;"> + <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png" + style="width:16px; height:16px;" class="icon"> + 1 hours ago + </div> + """ + response = mock.Mock(text=html, search_params=dicto) + results = subtitleseeker.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 259ebcf35..7fa1e2b8b 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -23,6 +23,7 @@ from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa from searx.tests.engines.test_stackoverflow import * # noqa +from searx.tests.engines.test_subtitleseeker import * # noqa from searx.tests.engines.test_twitter import * # noqa from searx.tests.engines.test_vimeo import * # noqa from searx.tests.engines.test_www500px import * # noqa |