summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--searx/engines/subtitleseeker.py15
-rw-r--r--searx/tests/engines/test_subtitleseeker.py169
-rw-r--r--searx/tests/test_engines.py1
3 files changed, 178 insertions, 7 deletions
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index 9aaf1947b..acefe30ea 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -12,6 +12,7 @@ from cgi import escape
from urllib import quote_plus
from lxml import html
from searx.languages import language_codes
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['videos']
@@ -20,7 +21,7 @@ language = ""
# search-url
url = 'http://www.subtitleseeker.com/'
-search_url = url+'search/TITLES/{query}&p={pageno}'
+search_url = url + 'search/TITLES/{query}&p={pageno}'
# specific xpath variables
results_xpath = '//div[@class="boxRows"]'
@@ -44,7 +45,7 @@ def response(resp):
if resp.search_params['language'] != 'all':
search_lang = [lc[1]
for lc in language_codes
- if lc[0][:2] == resp.search_params['language']][0]
+ if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
# parse results
for result in dom.xpath(results_xpath):
@@ -56,17 +57,17 @@ def response(resp):
elif search_lang:
href = href + search_lang + '/'
- title = escape(link.xpath(".//text()")[0])
+ title = escape(extract_text(link))
- content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
+ content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
content = content + " - "
- text = result.xpath('.//div[contains(@class,"grey-web")]')[0]
- content = content + html.tostring(text, method='text')
+ text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
+ content = content + text
if result.xpath(".//span") != []:
content = content +\
" - (" +\
- result.xpath(".//span//text()")[0].strip() +\
+ extract_text(result.xpath(".//span")) +\
")"
# append result
diff --git a/searx/tests/engines/test_subtitleseeker.py b/searx/tests/engines/test_subtitleseeker.py
new file mode 100644
index 000000000..a641601b2
--- /dev/null
+++ b/searx/tests/engines/test_subtitleseeker.py
@@ -0,0 +1,169 @@
+from collections import defaultdict
+import mock
+from searx.engines import subtitleseeker
+from searx.testing import SearxTestCase
+
+
+class TestSubtitleseekerEngine(SearxTestCase):
+
+ def test_request(self):
+ query = 'test_query'
+ dicto = defaultdict(dict)
+ dicto['pageno'] = 1
+ params = subtitleseeker.request(query, dicto)
+ self.assertTrue('url' in params)
+ self.assertTrue(query in params['url'])
+ self.assertTrue('subtitleseeker.com' in params['url'])
+
+ def test_response(self):
+ dicto = defaultdict(dict)
+ dicto['language'] = 'fr_FR'
+ response = mock.Mock(search_params=dicto)
+
+ self.assertRaises(AttributeError, subtitleseeker.response, None)
+ self.assertRaises(AttributeError, subtitleseeker.response, [])
+ self.assertRaises(AttributeError, subtitleseeker.response, '')
+ self.assertRaises(AttributeError, subtitleseeker.response, '[]')
+
+ response = mock.Mock(text='<html></html>', search_params=dicto)
+ self.assertEqual(subtitleseeker.response(response), [])
+
+ html = """
+ <div class="boxRows">
+ <div class="boxRowsInner" style="width:600px;">
+ <img src="http://static.subtitleseeker.com/images/movie.gif"
+ style="width:16px; height:16px;" class="icon">
+ <a href="http://this.is.the.url/"
+ class="blue" title="Title subtitle" >
+ This is the Title
+ </a>
+ <br><br>
+ <span class="f10b grey-dark arial" style="padding:0px 0px 5px 20px">
+ "Alternative Title"
+ </span>
+ </div>
+ <div class="boxRowsInner f12b red" style="width:70px;">
+ 1998
+ </div>
+ <div class="boxRowsInner grey-web f12" style="width:120px;">
+ <img src="http://static.subtitleseeker.com/images/basket_put.png"
+ style="width:16px; height:16px;" class="icon">
+ 1039 Subs
+ </div>
+ <div class="boxRowsInner grey-web f10" style="width:130px;">
+ <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png"
+ style="width:16px; height:16px;" class="icon">
+ 1 hours ago
+ </div>
+ <div class="clear"></div>
+ </div>
+ """
+ response = mock.Mock(text=html, search_params=dicto)
+ results = subtitleseeker.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This is the Title')
+ self.assertEqual(results[0]['url'], 'http://this.is.the.url/French/')
+ self.assertIn('1998', results[0]['content'])
+ self.assertIn('1039 Subs', results[0]['content'])
+ self.assertIn('Alternative Title', results[0]['content'])
+
+ html = """
+ <div class="boxRows">
+ <div class="boxRowsInner" style="width:600px;">
+ <img src="http://static.subtitleseeker.com/images/movie.gif"
+ style="width:16px; height:16px;" class="icon">
+ <a href="http://this.is.the.url/"
+ class="blue" title="Title subtitle" >
+ This is the Title
+ </a>
+ </div>
+ <div class="boxRowsInner f12b red" style="width:70px;">
+ 1998
+ </div>
+ <div class="boxRowsInner grey-web f12" style="width:120px;">
+ <img src="http://static.subtitleseeker.com/images/basket_put.png"
+ style="width:16px; height:16px;" class="icon">
+ 1039 Subs
+ </div>
+ <div class="boxRowsInner grey-web f10" style="width:130px;">
+ <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png"
+ style="width:16px; height:16px;" class="icon">
+ 1 hours ago
+ </div>
+ <div class="clear"></div>
+ </div>
+ """
+ dicto['language'] = 'all'
+ response = mock.Mock(text=html, search_params=dicto)
+ results = subtitleseeker.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This is the Title')
+ self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
+ self.assertIn('1998', results[0]['content'])
+ self.assertIn('1039 Subs', results[0]['content'])
+
+ html = """
+ <div class="boxRows">
+ <div class="boxRowsInner" style="width:600px;">
+ <img src="http://static.subtitleseeker.com/images/movie.gif"
+ style="width:16px; height:16px;" class="icon">
+ <a href="http://this.is.the.url/"
+ class="blue" title="Title subtitle" >
+ This is the Title
+ </a>
+ </div>
+ <div class="boxRowsInner f12b red" style="width:70px;">
+ 1998
+ </div>
+ <div class="boxRowsInner grey-web f12" style="width:120px;">
+ <img src="http://static.subtitleseeker.com/images/basket_put.png"
+ style="width:16px; height:16px;" class="icon">
+ 1039 Subs
+ </div>
+ <div class="boxRowsInner grey-web f10" style="width:130px;">
+ <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png"
+ style="width:16px; height:16px;" class="icon">
+ 1 hours ago
+ </div>
+ <div class="clear"></div>
+ </div>
+ """
+ subtitleseeker.language = 'English'
+ response = mock.Mock(text=html, search_params=dicto)
+ results = subtitleseeker.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0]['title'], 'This is the Title')
+ self.assertEqual(results[0]['url'], 'http://this.is.the.url/English/')
+ self.assertIn('1998', results[0]['content'])
+ self.assertIn('1039 Subs', results[0]['content'])
+
+ html = """
+ <div class="boxRowsInner" style="width:600px;">
+ <img src="http://static.subtitleseeker.com/images/movie.gif"
+ style="width:16px; height:16px;" class="icon">
+ <a href="http://this.is.the.url/"
+ class="blue" title="Title subtitle" >
+ This is the Title
+ </a>
+ </div>
+ <div class="boxRowsInner f12b red" style="width:70px;">
+ 1998
+ </div>
+ <div class="boxRowsInner grey-web f12" style="width:120px;">
+ <img src="http://static.subtitleseeker.com/images/basket_put.png"
+ style="width:16px; height:16px;" class="icon">
+ 1039 Subs
+ </div>
+ <div class="boxRowsInner grey-web f10" style="width:130px;">
+ <img src="http://static.subtitleseeker.com/images/arrow_refresh_small.png"
+ style="width:16px; height:16px;" class="icon">
+ 1 hours ago
+ </div>
+ """
+ response = mock.Mock(text=html, search_params=dicto)
+ results = subtitleseeker.response(response)
+ self.assertEqual(type(results), list)
+ self.assertEqual(len(results), 0)
diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py
index 259ebcf35..7fa1e2b8b 100644
--- a/searx/tests/test_engines.py
+++ b/searx/tests/test_engines.py
@@ -23,6 +23,7 @@ from searx.tests.engines.test_searchcode_code import * # noqa
from searx.tests.engines.test_searchcode_doc import * # noqa
from searx.tests.engines.test_soundcloud import * # noqa
from searx.tests.engines.test_stackoverflow import * # noqa
+from searx.tests.engines.test_subtitleseeker import * # noqa
from searx.tests.engines.test_twitter import * # noqa
from searx.tests.engines.test_vimeo import * # noqa
from searx.tests.engines.test_www500px import * # noqa