diff options
author | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-02-06 16:39:59 +0100 |
---|---|---|
committer | Cqoicebordel <Cqoicebordel@users.noreply.github.com> | 2015-02-06 16:39:59 +0100 |
commit | 3a4d6045c1da950d13d1d14192247389c5932631 (patch) | |
tree | 686b41000bafd692bb7e96595a80e40f4199e563 /searx/engines/subtitleseeker.py | |
parent | 9f13af8d3c96741e69f50b23abca9bd5a5e19998 (diff) | |
download | searxng-3a4d6045c1da950d13d1d14192247389c5932631.tar.gz searxng-3a4d6045c1da950d13d1d14192247389c5932631.zip |
Subtitleseeker's unit test
Diffstat (limited to 'searx/engines/subtitleseeker.py')
-rw-r--r-- | searx/engines/subtitleseeker.py | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index 9aaf1947b..acefe30ea 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -12,6 +12,7 @@ from cgi import escape from urllib import quote_plus from lxml import html from searx.languages import language_codes +from searx.engines.xpath import extract_text # engine dependent config categories = ['videos'] @@ -20,7 +21,7 @@ language = "" # search-url url = 'http://www.subtitleseeker.com/' -search_url = url+'search/TITLES/{query}&p={pageno}' +search_url = url + 'search/TITLES/{query}&p={pageno}' # specific xpath variables results_xpath = '//div[@class="boxRows"]' @@ -44,7 +45,7 @@ def response(resp): if resp.search_params['language'] != 'all': search_lang = [lc[1] for lc in language_codes - if lc[0][:2] == resp.search_params['language']][0] + if lc[0][:2] == resp.search_params['language'].split('_')[0]][0] # parse results for result in dom.xpath(results_xpath): @@ -56,17 +57,17 @@ def response(resp): elif search_lang: href = href + search_lang + '/' - title = escape(link.xpath(".//text()")[0]) + title = escape(extract_text(link)) - content = result.xpath('.//div[contains(@class,"red")]//text()')[0] + content = extract_text(result.xpath('.//div[contains(@class,"red")]')) content = content + " - " - text = result.xpath('.//div[contains(@class,"grey-web")]')[0] - content = content + html.tostring(text, method='text') + text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0]) + content = content + text if result.xpath(".//span") != []: content = content +\ " - (" +\ - result.xpath(".//span//text()")[0].strip() +\ + extract_text(result.xpath(".//span")) +\ ")" # append result |