Merge pull request #210 from Cqoicebordel/unit-tests

unit tests
author: Adam Tauber <asciimoo@gmail.com> 2015-02-12 10:52:55 +0100
committer: Adam Tauber <asciimoo@gmail.com> 2015-02-12 10:52:55 +0100
commit: f6db77d81ea87d99462b4c3cc40a8a27e0264724 (patch)
tree: b26fb71a62082aeec81c7bb1bb3d7447d006aed3 /searx/engines
parent: 516105c570a920dadeb87b34ee5ee434ad5cb16f (diff)
parent: f96154b7c454a3b02bf688f248b4471c2020c28f (diff)
download: searxng-f6db77d81ea87d99462b4c3cc40a8a27e0264724.tar.gz
searxng-f6db77d81ea87d99462b4c3cc40a8a27e0264724.zip
11 files changed, 54 insertions, 53 deletions
diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py
index d8841c1d1..4618c82b1 100644
--- a/searx/engines/currency_convert.py
+++ b/searx/engines/currency_convert.py
@@ -13,12 +13,9 @@ def request(query, params):
     if not m:
         # wrong query
         return params
-    try:
-        ammount, from_currency, to_currency = m.groups()
-        ammount = float(ammount)
-    except:
-        # wrong params
-        return params
+
+    ammount, from_currency, to_currency = m.groups()
+    ammount = float(ammount)
 
     q = (from_currency + to_currency).upper()
 
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 583e33f73..e35a6334c 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -15,7 +15,7 @@
 
 from urllib import urlencode
 from lxml.html import fromstring
-from searx.utils import html_to_text
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['general']
@@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}'
 # specific xpath variables
 result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa
 url_xpath = './/a[@class="large"]/@href'
-title_xpath = './/a[@class="large"]//text()'
-content_xpath = './/div[@class="snippet"]//text()'
+title_xpath = './/a[@class="large"]'
+content_xpath = './/div[@class="snippet"]'
 
 
 # do search-request
@@ -64,8 +64,8 @@ def response(resp):
         if not res_url:
             continue
 
-        title = html_to_text(''.join(r.xpath(title_xpath)))
-        content = html_to_text(''.join(r.xpath(content_xpath)))
+        title = extract_text(r.xpath(title_xpath))
+        content = extract_text(r.xpath(content_xpath))
 
         # append result
         results.append({'title': title,
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index b66d6c0f2..793e97d22 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -25,9 +25,10 @@ def request(query, params):
 
 
 def response(resp):
-    search_res = json.loads(resp.text)
     results = []
 
+    search_res = json.loads(resp.text)
+
     content = ''
     heading = search_res.get('Heading', '')
     attributes = []
@@ -68,7 +69,7 @@ def response(resp):
             results.append({'title': heading, 'url': firstURL})
 
     # related topics
-    for ddg_result in search_res.get('RelatedTopics', None):
+    for ddg_result in search_res.get('RelatedTopics', []):
         if 'FirstURL' in ddg_result:
             suggestion = result_to_text(ddg_result.get('FirstURL', None),
                                         ddg_result.get('Text', None),
diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py
index 5360ea156..4a5e60a60 100644
--- a/searx/engines/faroo.py
+++ b/searx/engines/faroo.py
@@ -37,7 +37,7 @@ search_category = {'general': 'web',
 
 # do search-request
 def request(query, params):
-    offset = (params['pageno']-1) * number_of_results + 1
+    offset = (params['pageno'] - 1) * number_of_results + 1
     categorie = search_category.get(params['category'], 'web')
 
     if params['language'] == 'all':
@@ -45,11 +45,11 @@ def request(query, params):
     else:
         language = params['language'].split('_')[0]
 
-    # skip, if language is not supported
+    # if language is not supported, put it in english
     if language != 'en' and\
        language != 'de' and\
        language != 'zh':
-        return params
+        language = 'en'
 
     params['url'] = search_url.format(offset=offset,
                                       number_of_results=number_of_results,
@@ -69,12 +69,10 @@ def response(resp):
     # HTTP-Code 401: api-key is not valide
     if resp.status_code == 401:
         raise Exception("API key is not valide")
-        return []
 
     # HTTP-Code 429: rate limit exceeded
     if resp.status_code == 429:
         raise Exception("rate limit has been exceeded!")
-        return []
 
     results = []
 
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 68446ef5f..60c3c13ca 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -38,6 +38,9 @@ def response(resp):
 
     # parse results
     for r in json:
+        if 'display_name' not in r:
+            continue
+
         title = r['display_name']
         osm_type = r.get('osm_type', r.get('type'))
         url = result_base_url.format(osm_type=osm_type,
@@ -49,10 +52,8 @@ def response(resp):
         geojson = r.get('geojson')
 
         # if no geojson is found and osm_type is a node, add geojson Point
-        if not geojson and\
-           osm_type == 'node':
-            geojson = {u'type': u'Point',
-                       u'coordinates': [r['lon'], r['lat']]}
+        if not geojson and osm_type == 'node':
+            geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
 
         address_raw = r.get('address')
         address = {}
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
index 16340d24a..a9c558c4b 100644
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@@ -61,7 +61,7 @@ def response(resp):
             continue
 
         # get title
-        title = properties['name']
+        title = properties.get('name')
 
         # get osm-type
         if properties.get('osm_type') == 'N':
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index d60ecd978..9d5b4befe 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -13,6 +13,7 @@
 from lxml import html
 from cgi import escape
 import re
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['general']
@@ -45,8 +46,7 @@ def request(query, params):
 
     # set language if specified
     if params['language'] != 'all':
-        params['data']['with_language'] = ('lang_' +
-                                           params['language'].split('_')[0])
+        params['data']['with_language'] = ('lang_' + params['language'].split('_')[0])
 
     return params
 
@@ -64,18 +64,15 @@ def response(resp):
             continue
         link = links[0]
         url = link.attrib.get('href')
-        try:
-            title = escape(link.text_content())
-        except UnicodeDecodeError:
-            continue
 
         # block google-ad url's
         if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
             continue
 
+        title = escape(extract_text(link))
+
         if result.xpath('./p[@class="desc"]'):
-            content = escape(result.xpath('./p[@class="desc"]')[0]
-                             .text_content())
+            content = escape(extract_text(result.xpath('./p[@class="desc"]')))
         else:
             content = ''
 
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index 9aaf1947b..acefe30ea 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -12,6 +12,7 @@ from cgi import escape
 from urllib import quote_plus
 from lxml import html
 from searx.languages import language_codes
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['videos']
@@ -20,7 +21,7 @@ language = ""
 
 # search-url
 url = 'http://www.subtitleseeker.com/'
-search_url = url+'search/TITLES/{query}&p={pageno}'
+search_url = url + 'search/TITLES/{query}&p={pageno}'
 
 # specific xpath variables
 results_xpath = '//div[@class="boxRows"]'
@@ -44,7 +45,7 @@ def response(resp):
     if resp.search_params['language'] != 'all':
         search_lang = [lc[1]
                        for lc in language_codes
-                       if lc[0][:2] == resp.search_params['language']][0]
+                       if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
 
     # parse results
     for result in dom.xpath(results_xpath):
@@ -56,17 +57,17 @@ def response(resp):
         elif search_lang:
             href = href + search_lang + '/'
 
-        title = escape(link.xpath(".//text()")[0])
+        title = escape(extract_text(link))
 
-        content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
+        content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
         content = content + " - "
-        text = result.xpath('.//div[contains(@class,"grey-web")]')[0]
-        content = content + html.tostring(text, method='text')
+        text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
+        content = content + text
 
         if result.xpath(".//span") != []:
             content = content +\
                 " - (" +\
-                result.xpath(".//span//text()")[0].strip() +\
+                extract_text(result.xpath(".//span")) +\
                 ")"
 
         # append result
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index bd9a8c2fc..0e35e6188 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -13,8 +13,8 @@
 from urlparse import urljoin
 from urllib import urlencode
 from lxml import html
-from cgi import escape
 from datetime import datetime
+from searx.engines.xpath import extract_text
 
 # engine dependent config
 categories = ['social media']
@@ -22,12 +22,12 @@ language_support = True
 
 # search-url
 base_url = 'https://twitter.com/'
-search_url = base_url+'search?'
+search_url = base_url + 'search?'
 
 # specific xpath variables
 results_xpath = '//li[@data-item-type="tweet"]'
 link_xpath = './/small[@class="time"]//a'
-title_xpath = './/span[@class="username js-action-profile-name"]//text()'
+title_xpath = './/span[@class="username js-action-profile-name"]'
 content_xpath = './/p[@class="js-tweet-text tweet-text"]'
 timestamp_xpath = './/span[contains(@class,"_timestamp")]'
 
@@ -39,6 +39,8 @@ def request(query, params):
     # set language if specified
     if params['language'] != 'all':
         params['cookies']['lang'] = params['language'].split('_')[0]
+    else:
+        params['cookies']['lang'] = 'en'
 
     return params
 
@@ -53,8 +55,9 @@ def response(resp):
     for tweet in dom.xpath(results_xpath):
         link = tweet.xpath(link_xpath)[0]
         url = urljoin(base_url, link.attrib.get('href'))
-        title = ''.join(tweet.xpath(title_xpath))
-        content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
+        title = extract_text(tweet.xpath(title_xpath))
+        content = extract_text(tweet.xpath(content_xpath)[0])
+
         pubdate = tweet.xpath(timestamp_xpath)
         if len(pubdate) > 0:
             timestamp = float(pubdate[0].attrib.get('data-time'))
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index 17e2a7aab..3d26c9cc4 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -25,10 +25,10 @@ number_of_results = 5
 # search-url
 base_url = 'http://localhost:8090'
 search_url = '/yacysearch.json?{query}'\
-                             '&startRecord={offset}'\
-                             '&maximumRecords={limit}'\
-                             '&contentdom={search_type}'\
-                             '&resource=global'             # noqa
+             '&startRecord={offset}'\
+             '&maximumRecords={limit}'\
+             '&contentdom={search_type}'\
+             '&resource=global'
 
 # yacy specific type-definitions
 search_types = {'general': 'text',
@@ -41,7 +41,7 @@ search_types = {'general': 'text',
 # do search-request
 def request(query, params):
     offset = (params['pageno'] - 1) * number_of_results
-    search_type = search_types.get(params['category'], '0')
+    search_type = search_types.get(params.get('category'), '0')
 
     params['url'] = base_url +\
         search_url.format(query=urlencode({'query': query}),
@@ -66,9 +66,12 @@ def response(resp):
     if not raw_search_results:
         return []
 
-    search_results = raw_search_results.get('channels', {})[0].get('items', [])
+    search_results = raw_search_results.get('channels', [])
 
-    for result in search_results:
+    if len(search_results) == 0:
+        return []
+
+    for result in search_results[0].get('items', []):
         # parse image results
         if result.get('image'):
             # append result
@@ -88,7 +91,7 @@ def response(resp):
                             'content': result['description'],
                             'publishedDate': publishedDate})
 
-        #TODO parse video, audio and file results
+        # TODO parse video, audio and file results
 
     # return results
     return results
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index c6c5b0d0d..161f7513b 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -35,7 +35,7 @@ suggestion_xpath = '//div[@id="satat"]//a'
 def parse_url(url_string):
     endings = ['/RS', '/RK']
     endpositions = []
-    start = url_string.find('http', url_string.find('/RU=')+1)
+    start = url_string.find('http', url_string.find('/RU=') + 1)
 
     for ending in endings:
         endpos = url_string.rfind(ending)
@@ -91,7 +91,7 @@ def response(resp):
                         'content': content})
 
     # if no suggestion found, return results
-    if not suggestion_xpath:
+    if not dom.xpath(suggestion_xpath):
         return results
 
     # parse suggestion
author	Adam Tauber <asciimoo@gmail.com>	2015-02-12 10:52:55 +0100
committer	Adam Tauber <asciimoo@gmail.com>	2015-02-12 10:52:55 +0100
commit	f6db77d81ea87d99462b4c3cc40a8a27e0264724 (patch)
tree	b26fb71a62082aeec81c7bb1bb3d7447d006aed3 /searx/engines
parent	516105c570a920dadeb87b34ee5ee434ad5cb16f (diff)
parent	f96154b7c454a3b02bf688f248b4471c2020c28f (diff)
download	searxng-f6db77d81ea87d99462b4c3cc40a8a27e0264724.tar.gz searxng-f6db77d81ea87d99462b4c3cc40a8a27e0264724.zip