summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorAdam Tauber <asciimoo@gmail.com>2015-02-12 10:52:55 +0100
committerAdam Tauber <asciimoo@gmail.com>2015-02-12 10:52:55 +0100
commitf6db77d81ea87d99462b4c3cc40a8a27e0264724 (patch)
treeb26fb71a62082aeec81c7bb1bb3d7447d006aed3 /searx/engines
parent516105c570a920dadeb87b34ee5ee434ad5cb16f (diff)
parentf96154b7c454a3b02bf688f248b4471c2020c28f (diff)
downloadsearxng-f6db77d81ea87d99462b4c3cc40a8a27e0264724.tar.gz
searxng-f6db77d81ea87d99462b4c3cc40a8a27e0264724.zip
Merge pull request #210 from Cqoicebordel/unit-tests
unit tests
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/currency_convert.py9
-rw-r--r--searx/engines/duckduckgo.py10
-rw-r--r--searx/engines/duckduckgo_definitions.py5
-rw-r--r--searx/engines/faroo.py8
-rw-r--r--searx/engines/openstreetmap.py9
-rw-r--r--searx/engines/photon.py2
-rw-r--r--searx/engines/startpage.py13
-rw-r--r--searx/engines/subtitleseeker.py15
-rw-r--r--searx/engines/twitter.py13
-rw-r--r--searx/engines/yacy.py19
-rw-r--r--searx/engines/yahoo.py4
11 files changed, 54 insertions, 53 deletions
diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py
index d8841c1d1..4618c82b1 100644
--- a/searx/engines/currency_convert.py
+++ b/searx/engines/currency_convert.py
@@ -13,12 +13,9 @@ def request(query, params):
if not m:
# wrong query
return params
- try:
- ammount, from_currency, to_currency = m.groups()
- ammount = float(ammount)
- except:
- # wrong params
- return params
+
+ ammount, from_currency, to_currency = m.groups()
+ ammount = float(ammount)
q = (from_currency + to_currency).upper()
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 583e33f73..e35a6334c 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -15,7 +15,7 @@
from urllib import urlencode
from lxml.html import fromstring
-from searx.utils import html_to_text
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['general']
@@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}'
# specific xpath variables
result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa
url_xpath = './/a[@class="large"]/@href'
-title_xpath = './/a[@class="large"]//text()'
-content_xpath = './/div[@class="snippet"]//text()'
+title_xpath = './/a[@class="large"]'
+content_xpath = './/div[@class="snippet"]'
# do search-request
@@ -64,8 +64,8 @@ def response(resp):
if not res_url:
continue
- title = html_to_text(''.join(r.xpath(title_xpath)))
- content = html_to_text(''.join(r.xpath(content_xpath)))
+ title = extract_text(r.xpath(title_xpath))
+ content = extract_text(r.xpath(content_xpath))
# append result
results.append({'title': title,
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index b66d6c0f2..793e97d22 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -25,9 +25,10 @@ def request(query, params):
def response(resp):
- search_res = json.loads(resp.text)
results = []
+ search_res = json.loads(resp.text)
+
content = ''
heading = search_res.get('Heading', '')
attributes = []
@@ -68,7 +69,7 @@ def response(resp):
results.append({'title': heading, 'url': firstURL})
# related topics
- for ddg_result in search_res.get('RelatedTopics', None):
+ for ddg_result in search_res.get('RelatedTopics', []):
if 'FirstURL' in ddg_result:
suggestion = result_to_text(ddg_result.get('FirstURL', None),
ddg_result.get('Text', None),
diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py
index 5360ea156..4a5e60a60 100644
--- a/searx/engines/faroo.py
+++ b/searx/engines/faroo.py
@@ -37,7 +37,7 @@ search_category = {'general': 'web',
# do search-request
def request(query, params):
- offset = (params['pageno']-1) * number_of_results + 1
+ offset = (params['pageno'] - 1) * number_of_results + 1
categorie = search_category.get(params['category'], 'web')
if params['language'] == 'all':
@@ -45,11 +45,11 @@ def request(query, params):
else:
language = params['language'].split('_')[0]
- # skip, if language is not supported
+ # if language is not supported, put it in english
if language != 'en' and\
language != 'de' and\
language != 'zh':
- return params
+ language = 'en'
params['url'] = search_url.format(offset=offset,
number_of_results=number_of_results,
@@ -69,12 +69,10 @@ def response(resp):
# HTTP-Code 401: api-key is not valide
if resp.status_code == 401:
raise Exception("API key is not valide")
- return []
# HTTP-Code 429: rate limit exceeded
if resp.status_code == 429:
raise Exception("rate limit has been exceeded!")
- return []
results = []
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 68446ef5f..60c3c13ca 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -38,6 +38,9 @@ def response(resp):
# parse results
for r in json:
+ if 'display_name' not in r:
+ continue
+
title = r['display_name']
osm_type = r.get('osm_type', r.get('type'))
url = result_base_url.format(osm_type=osm_type,
@@ -49,10 +52,8 @@ def response(resp):
geojson = r.get('geojson')
# if no geojson is found and osm_type is a node, add geojson Point
- if not geojson and\
- osm_type == 'node':
- geojson = {u'type': u'Point',
- u'coordinates': [r['lon'], r['lat']]}
+ if not geojson and osm_type == 'node':
+ geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
address_raw = r.get('address')
address = {}
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
index 16340d24a..a9c558c4b 100644
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@@ -61,7 +61,7 @@ def response(resp):
continue
# get title
- title = properties['name']
+ title = properties.get('name')
# get osm-type
if properties.get('osm_type') == 'N':
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index d60ecd978..9d5b4befe 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -13,6 +13,7 @@
from lxml import html
from cgi import escape
import re
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['general']
@@ -45,8 +46,7 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
- params['data']['with_language'] = ('lang_' +
- params['language'].split('_')[0])
+ params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
return params
@@ -64,18 +64,15 @@ def response(resp):
continue
link = links[0]
url = link.attrib.get('href')
- try:
- title = escape(link.text_content())
- except UnicodeDecodeError:
- continue
# block google-ad url's
if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
continue
+ title = escape(extract_text(link))
+
if result.xpath('./p[@class="desc"]'):
- content = escape(result.xpath('./p[@class="desc"]')[0]
- .text_content())
+ content = escape(extract_text(result.xpath('./p[@class="desc"]')))
else:
content = ''
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index 9aaf1947b..acefe30ea 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -12,6 +12,7 @@ from cgi import escape
from urllib import quote_plus
from lxml import html
from searx.languages import language_codes
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['videos']
@@ -20,7 +21,7 @@ language = ""
# search-url
url = 'http://www.subtitleseeker.com/'
-search_url = url+'search/TITLES/{query}&p={pageno}'
+search_url = url + 'search/TITLES/{query}&p={pageno}'
# specific xpath variables
results_xpath = '//div[@class="boxRows"]'
@@ -44,7 +45,7 @@ def response(resp):
if resp.search_params['language'] != 'all':
search_lang = [lc[1]
for lc in language_codes
- if lc[0][:2] == resp.search_params['language']][0]
+ if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
# parse results
for result in dom.xpath(results_xpath):
@@ -56,17 +57,17 @@ def response(resp):
elif search_lang:
href = href + search_lang + '/'
- title = escape(link.xpath(".//text()")[0])
+ title = escape(extract_text(link))
- content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
+ content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
content = content + " - "
- text = result.xpath('.//div[contains(@class,"grey-web")]')[0]
- content = content + html.tostring(text, method='text')
+ text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
+ content = content + text
if result.xpath(".//span") != []:
content = content +\
" - (" +\
- result.xpath(".//span//text()")[0].strip() +\
+ extract_text(result.xpath(".//span")) +\
")"
# append result
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index bd9a8c2fc..0e35e6188 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -13,8 +13,8 @@
from urlparse import urljoin
from urllib import urlencode
from lxml import html
-from cgi import escape
from datetime import datetime
+from searx.engines.xpath import extract_text
# engine dependent config
categories = ['social media']
@@ -22,12 +22,12 @@ language_support = True
# search-url
base_url = 'https://twitter.com/'
-search_url = base_url+'search?'
+search_url = base_url + 'search?'
# specific xpath variables
results_xpath = '//li[@data-item-type="tweet"]'
link_xpath = './/small[@class="time"]//a'
-title_xpath = './/span[@class="username js-action-profile-name"]//text()'
+title_xpath = './/span[@class="username js-action-profile-name"]'
content_xpath = './/p[@class="js-tweet-text tweet-text"]'
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
@@ -39,6 +39,8 @@ def request(query, params):
# set language if specified
if params['language'] != 'all':
params['cookies']['lang'] = params['language'].split('_')[0]
+ else:
+ params['cookies']['lang'] = 'en'
return params
@@ -53,8 +55,9 @@ def response(resp):
for tweet in dom.xpath(results_xpath):
link = tweet.xpath(link_xpath)[0]
url = urljoin(base_url, link.attrib.get('href'))
- title = ''.join(tweet.xpath(title_xpath))
- content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
+ title = extract_text(tweet.xpath(title_xpath))
+ content = extract_text(tweet.xpath(content_xpath)[0])
+
pubdate = tweet.xpath(timestamp_xpath)
if len(pubdate) > 0:
timestamp = float(pubdate[0].attrib.get('data-time'))
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index 17e2a7aab..3d26c9cc4 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -25,10 +25,10 @@ number_of_results = 5
# search-url
base_url = 'http://localhost:8090'
search_url = '/yacysearch.json?{query}'\
- '&startRecord={offset}'\
- '&maximumRecords={limit}'\
- '&contentdom={search_type}'\
- '&resource=global' # noqa
+ '&startRecord={offset}'\
+ '&maximumRecords={limit}'\
+ '&contentdom={search_type}'\
+ '&resource=global'
# yacy specific type-definitions
search_types = {'general': 'text',
@@ -41,7 +41,7 @@ search_types = {'general': 'text',
# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * number_of_results
- search_type = search_types.get(params['category'], '0')
+ search_type = search_types.get(params.get('category'), '0')
params['url'] = base_url +\
search_url.format(query=urlencode({'query': query}),
@@ -66,9 +66,12 @@ def response(resp):
if not raw_search_results:
return []
- search_results = raw_search_results.get('channels', {})[0].get('items', [])
+ search_results = raw_search_results.get('channels', [])
- for result in search_results:
+ if len(search_results) == 0:
+ return []
+
+ for result in search_results[0].get('items', []):
# parse image results
if result.get('image'):
# append result
@@ -88,7 +91,7 @@ def response(resp):
'content': result['description'],
'publishedDate': publishedDate})
- #TODO parse video, audio and file results
+ # TODO parse video, audio and file results
# return results
return results
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index c6c5b0d0d..161f7513b 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -35,7 +35,7 @@ suggestion_xpath = '//div[@id="satat"]//a'
def parse_url(url_string):
endings = ['/RS', '/RK']
endpositions = []
- start = url_string.find('http', url_string.find('/RU=')+1)
+ start = url_string.find('http', url_string.find('/RU=') + 1)
for ending in endings:
endpos = url_string.rfind(ending)
@@ -91,7 +91,7 @@ def response(resp):
'content': content})
# if no suggestion found, return results
- if not suggestion_xpath:
+ if not dom.xpath(suggestion_xpath):
return results
# parse suggestion