diff options
author | Adam Tauber <asciimoo@gmail.com> | 2016-11-30 18:43:03 +0100 |
---|---|---|
committer | Adam Tauber <asciimoo@gmail.com> | 2017-05-15 12:02:30 +0200 |
commit | 52e615dede8538c36f569d2cf07835427a9a0db6 (patch) | |
tree | ac65990c72156def2d49e81d981f0b3beda4fd2e | |
parent | 46a2c63f8e1c3819cceff2d61fe9106051e8ecee (diff) | |
download | searxng-52e615dede8538c36f569d2cf07835427a9a0db6.tar.gz searxng-52e615dede8538c36f569d2cf07835427a9a0db6.zip |
[enh] py3 compatibility
115 files changed, 517 insertions, 513 deletions
diff --git a/.travis.yml b/.travis.yml index 0a174ff66..b6017cd93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ addons: language: python python: - "2.7" + - "3.6" before_install: - "export DISPLAY=:99.0" - "sh -e /etc/init.d/xvfb start" @@ -24,9 +25,9 @@ script: - ./manage.sh styles - ./manage.sh grunt_build - ./manage.sh tests - - ./manage.sh py_test_coverage after_success: - coveralls + - ./manage.sh py_test_coverage + - coveralls notifications: irc: channels: diff --git a/requirements-dev.txt b/requirements-dev.txt index 01d1e1497..691a1e7ba 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,8 +3,7 @@ mock==2.0.0 nose2[coverage-plugin] pep8==1.7.0 plone.testing==5.0.0 -robotframework-selenium2library==1.8.0 -robotsuite==1.7.0 +splinter==0.7.5 transifex-client==0.12.2 unittest2==1.1.0 zope.testrunner==4.5.1 diff --git a/searx/answerers/__init__.py b/searx/answerers/__init__.py index 8f5951c75..444316f11 100644 --- a/searx/answerers/__init__.py +++ b/searx/answerers/__init__.py @@ -1,8 +1,12 @@ from os import listdir from os.path import realpath, dirname, join, isdir +from sys import version_info from searx.utils import load_module from collections import defaultdict +if version_info[0] == 3: + unicode = str + answerers_dir = dirname(realpath(__file__)) @@ -10,7 +14,7 @@ answerers_dir = dirname(realpath(__file__)) def load_answerers(): answerers = [] for filename in listdir(answerers_dir): - if not isdir(join(answerers_dir, filename)): + if not isdir(join(answerers_dir, filename)) or filename.startswith('_'): continue module = load_module('answerer.py', join(answerers_dir, filename)) if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords): @@ -30,12 +34,12 @@ def get_answerers_by_keywords(answerers): def ask(query): results = [] - query_parts = filter(None, query.query.split()) + query_parts = list(filter(None, query.query.split())) - if query_parts[0] not in answerers_by_keywords: + if query_parts[0].decode('utf-8') not in answerers_by_keywords: return results - for answerer in answerers_by_keywords[query_parts[0]]: + for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]: result = answerer(query) if result: results.append(result) diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py index 510d9f5be..f2b8bf3e5 100644 --- a/searx/answerers/random/answerer.py +++ b/searx/answerers/random/answerer.py @@ -1,5 +1,6 @@ import random import string +import sys from flask_babel import gettext # required answerer attribute @@ -8,7 +9,11 @@ keywords = ('random',) random_int_max = 2**31 -random_string_letters = string.lowercase + string.digits + string.uppercase +if sys.version_info[0] == 2: + random_string_letters = string.lowercase + string.digits + string.uppercase +else: + unicode = str + random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase def random_string(): @@ -24,9 +29,9 @@ def random_int(): return unicode(random.randint(-random_int_max, random_int_max)) -random_types = {u'string': random_string, - u'int': random_int, - u'float': random_float} +random_types = {b'string': random_string, + b'int': random_int, + b'float': random_float} # required answerer function diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py index a04695f56..73dd25cfd 100644 --- a/searx/answerers/statistics/answerer.py +++ b/searx/answerers/statistics/answerer.py @@ -1,8 +1,12 @@ +from sys import version_info from functools import reduce from operator import mul from flask_babel import gettext +if version_info[0] == 3: + unicode = str + keywords = ('min', 'max', 'avg', @@ -19,22 +23,22 @@ def answer(query): return [] try: - args = map(float, parts[1:]) + args = list(map(float, parts[1:])) except: return [] func = parts[0] answer = None - if func == 'min': + if func == b'min': answer = min(args) - elif func == 'max': + elif func == b'max': answer = max(args) - elif func == 'avg': + elif func == b'avg': answer = sum(args) / len(args) - elif func == 'sum': + elif func == b'sum': answer = sum(args) - elif func == 'prod': + elif func == b'prod': answer = reduce(mul, args, 1) if answer is None: diff --git a/searx/autocomplete.py b/searx/autocomplete.py index b360af9f6..de0623a8a 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -18,7 +18,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. from lxml import etree from json import loads -from urllib import urlencode from searx import settings from searx.languages import language_codes from searx.engines import ( @@ -26,6 +25,11 @@ from searx.engines import ( ) from searx.poolrequests import get as http_get +try: + from urllib import urlencode +except: + from urllib.parse import urlencode + def get(*args, **kwargs): if 'timeout' not in kwargs: diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index c6bc3cb6d..0de04bd95 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -1,8 +1,7 @@ -from urllib import quote from lxml import html from searx.engines.xpath import extract_text from searx.utils import get_torrent_size -from urlparse import urljoin +from searx.url_utils import quote, urljoin url = 'https://1337x.to/' search_url = url + 'search/{search_term}/{pageno}/' diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 77184a282..023ec409a 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -72,12 +72,11 @@ def load_engine(engine_data): if engine_data['categories'] == 'none': engine.categories = [] else: - engine.categories = map( - str.strip, engine_data['categories'].split(',')) + engine.categories = list(map(str.strip, engine_data['categories'].split(','))) continue setattr(engine, param_name, engine_data[param_name]) - for arg_name, arg_value in engine_default_args.iteritems(): + for arg_name, arg_value in engine_default_args.items(): if not hasattr(engine, arg_name): setattr(engine, arg_name, arg_value) diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index dca825790..cad06f8c6 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -11,10 +11,9 @@ @parse url, title """ -from urlparse import urljoin -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text +from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] diff --git a/searx/engines/base.py b/searx/engines/base.py index a552453ce..ff006a3bc 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -14,10 +14,10 @@ """ from lxml import etree -from urllib import urlencode -from searx.utils import searx_useragent from datetime import datetime import re +from searx.url_utils import urlencode +from searx.utils import searx_useragent categories = ['science'] @@ -73,7 +73,7 @@ def request(query, params): def response(resp): results = [] - search_results = etree.XML(resp.content) + search_results = etree.XML(resp.text) for entry in search_results.xpath('./result/doc'): content = "No description available" diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 4e7ead82d..052d567ea 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -13,9 +13,9 @@ @todo publishedDate """ -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text +from searx.url_utils import urlencode # engine dependent config categories = ['general'] diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 97f6dca37..e79740e50 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -15,11 +15,11 @@ limited response to 10 images """ -from urllib import urlencode from lxml import html from json import loads import re from searx.engines.bing import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 765bcd38e..8e3cc517e 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -11,13 +11,12 @@ @parse url, title, content, publishedDate, thumbnail """ -from urllib import urlencode -from urlparse import urlparse, parse_qsl from datetime import datetime from dateutil import parser from lxml import etree from searx.utils import list_get from searx.engines.bing import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode, urlparse, parse_qsl # engine dependent config categories = ['news'] @@ -86,7 +85,7 @@ def request(query, params): def response(resp): results = [] - rss = etree.fromstring(resp.content) + rss = etree.fromstring(resp.text) ns = rss.nsmap diff --git a/searx/engines/blekko_images.py b/searx/engines/blekko_images.py index c0664f390..f71645634 100644 --- a/searx/engines/blekko_images.py +++ b/searx/engines/blekko_images.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 33c8355de..40438673f 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -10,11 +10,10 @@ @parse url, title, content, seed, leech, magnetlink """ -from urlparse import urljoin -from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text +from searx.url_utils import quote, urljoin from searx.utils import get_torrent_size # engine dependent config @@ -38,7 +37,7 @@ def request(query, params): def response(resp): results = [] - dom = html.fromstring(resp.content) + dom = html.fromstring(resp.text) search_res = dom.xpath('//div[@id="search_res"]/table/tr') diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index bc839cfb5..1218d4849 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,21 +1,25 @@ -from datetime import datetime +import json import re import os -import json +import sys import unicodedata +from datetime import datetime + +if sys.version_info[0] == 3: + unicode = str categories = [] url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' weight = 100 -parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa +parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) db = 1 def normalize_name(name): - name = name.lower().replace('-', ' ').rstrip('s') + name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() @@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language): def request(query, params): - m = parser_re.match(unicode(query, 'utf8')) + m = parser_re.match(query) if not m: # wrong query return params diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 8c69aafe0..fad7e596c 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -12,10 +12,9 @@ @todo set content-parameter with correct data """ -from urllib import urlencode from json import loads from datetime import datetime -from requests import get +from searx.url_utils import urlencode # engine dependent config categories = ['videos'] diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 3db1af3d2..af63478fb 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['music'] @@ -30,8 +30,7 @@ embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true" def request(query, params): offset = (params['pageno'] - 1) * 25 - params['url'] = search_url.format(query=urlencode({'q': query}), - offset=offset) + params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset) return params diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index a24b75b8a..bb85c6dc5 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -12,10 +12,10 @@ @todo rewrite to api """ -from urllib import urlencode from lxml import html import re from searx.engines.xpath import extract_text +from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 20a9a8980..7c3478629 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -10,20 +10,20 @@ """ import re -from urlparse import urljoin from lxml import html from searx.utils import is_valid_lang +from searx.url_utils import urljoin categories = ['general'] url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' weight = 100 -parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) +parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) results_xpath = './/table[@id="r"]/tr' def request(query, params): - m = parser_re.match(unicode(query, 'utf8')) + m = parser_re.match(query) if not m: return params diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index b55d7747a..ff2f94593 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -10,10 +10,14 @@ @parse url, title, content, magnetlink """ -from urlparse import urljoin +from sys import version_info from lxml import html from searx.engines.xpath import extract_text from searx.utils import get_torrent_size +from searx.url_utils import urljoin + +if version_info[0] == 3: + unicode = str categories = ['videos', 'music', 'files'] paging = True @@ -31,7 +35,7 @@ def request(query, params): def response(resp): - dom = html.fromstring(resp.content) + dom = html.fromstring(resp.text) search_res = dom.xpath('.//td[@class="x-item"]') if not search_res: diff --git a/searx/engines/digg.py b/searx/engines/digg.py index 238b466a0..606747a4d 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -10,10 +10,10 @@ @parse url, title, content, publishedDate, thumbnail """ -from urllib import quote_plus +from dateutil import parser from json import loads from lxml import html -from dateutil import parser +from searx.url_utils import quote_plus # engine dependent config categories = ['news', 'social media'] diff --git a/searx/engines/doku.py b/searx/engines/doku.py index 93867fd0d..a391be444 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -9,9 +9,9 @@ # @stable yes # @parse (general) url, title, content -from urllib import urlencode from lxml.html import fromstring from searx.engines.xpath import extract_text +from searx.url_utils import urlencode # engine dependent config categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 1ae484123..1872ab7d4 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -13,11 +13,11 @@ @todo rewrite to api """ -from urllib import urlencode from lxml.html import fromstring from requests import get from json import loads from searx.engines.xpath import extract_text +from searx.url_utils import urlencode # engine dependent config categories = ['general'] diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index dd3f12e1e..21c6a6578 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -1,10 +1,10 @@ import json -from urllib import urlencode -from re import compile, sub from lxml import html -from searx.utils import html_to_text +from re import compile from searx.engines.xpath import extract_text from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode +from searx.utils import html_to_text url = 'https://api.duckduckgo.com/'\ + '?{query}&format=json&pretty=0&no_redirect=1&d=1' diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py index 9fa244e77..e24d1b7dc 100644 --- a/searx/engines/faroo.py +++ b/searx/engines/faroo.py @@ -10,10 +10,10 @@ @parse url, title, content, publishedDate, img_src """ -from urllib import urlencode from json import loads import datetime from searx.utils import searx_useragent +from searx.url_utils import urlencode # engine dependent config categories = ['general', 'news'] diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index 6d470a4eb..a6b01a8ee 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -9,9 +9,9 @@ @parse url, title, content """ -from urllib import urlencode -from searx.engines.xpath import extract_text from lxml import html +from searx.engines.xpath import extract_text +from searx.url_utils import urlencode # engine dependent config categories = ['files'] @@ -24,8 +24,7 @@ search_url = base_url + 'repository/browse/?{query}' # do search-request def request(query, params): - query = urlencode({'fdfilter': query, - 'fdpage': params['pageno']}) + query = urlencode({'fdfilter': query, 'fdpage': params['pageno']}) params['url'] = search_url.format(query=query) return params diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py index 71665bd4e..ed57a6bf3 100644 --- a/searx/engines/filecrop.py +++ b/searx/engines/filecrop.py @@ -1,5 +1,9 @@ -from urllib import urlencode -from HTMLParser import HTMLParser +from searx.url_utils import urlencode + +try: + from HTMLParser import HTMLParser +except: + from html.parser import HTMLParser url = 'http://www.filecrop.com/' search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa @@ -73,8 +77,7 @@ class FilecropResultParser(HTMLParser): def request(query, params): index = 1 + (params['pageno'] - 1) * 30 - params['url'] = search_url.format(query=urlencode({'w': query}), - index=index) + params['url'] = search_url.format(query=urlencode({'w': query}), index=index) return params diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 5ce1160e9..de1769370 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -13,8 +13,8 @@ More info on api-key : https://www.flickr.com/services/apps/create/ """ -from urllib import urlencode from json import loads +from searx.url_utils import urlencode categories = ['images'] diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 3c0ec7b70..08f07f7ce 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -12,11 +12,11 @@ @parse url, title, thumbnail, img_src """ -from urllib import urlencode from json import loads from time import time import re from searx.engines import logger +from searx.url_utils import urlencode logger = logger.getChild('flickr-noapi') diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py index e8d1d8aa7..f2eecdc73 100644 --- a/searx/engines/framalibre.py +++ b/searx/engines/framalibre.py @@ -10,12 +10,10 @@ @parse url, title, content, thumbnail, img_src """ -from urlparse import urljoin from cgi import escape -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text -from dateutil import parser +from searx.url_utils import urljoin, urlencode # engine dependent config categories = ['it'] diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py index a9383f862..a67b42dbe 100644 --- a/searx/engines/frinkiac.py +++ b/searx/engines/frinkiac.py @@ -10,7 +10,7 @@ Frinkiac (Images) """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode categories = ['images'] diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 0c1d7f613..37933c69b 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -11,10 +11,9 @@ """ from json import loads -from random import randint from time import time -from urllib import urlencode from lxml.html import fromstring +from searx.url_utils import urlencode # engine dependent config categories = ['general'] diff --git a/searx/engines/github.py b/searx/engines/github.py index 7adef3be9..eaa00da4f 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -10,8 +10,8 @@ @parse url, title, content """ -from urllib import urlencode from json import loads +from searx.url_utils import urlencode # engine dependent config categories = ['it'] diff --git a/searx/engines/google.py b/searx/engines/google.py index e14e9e702..934f5c29a 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -9,11 +9,10 @@ # @parse url, title, content, suggestion import re -from urllib import urlencode -from urlparse import urlparse, parse_qsl from lxml import html, etree from searx.engines.xpath import extract_text, extract_url -from searx.search import logger +from searx import logger +from searx.url_utils import urlencode, urlparse, parse_qsl logger = logger.getChild('google engine') diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 9a3c71c7e..9692f4b82 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -11,9 +11,9 @@ """ from datetime import date, timedelta -from urllib import urlencode from json import loads from lxml import html +from searx.url_utils import urlencode # engine dependent config diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index 6b79ff5c8..7344b5289 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -11,9 +11,8 @@ """ from lxml import html -from urllib import urlencode -from json import loads from searx.engines.google import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode # search-url categories = ['news'] diff --git a/searx/engines/ina.py b/searx/engines/ina.py index 86a39782b..37a05f099 100644 --- a/searx/engines/ina.py +++ b/searx/engines/ina.py @@ -12,11 +12,15 @@ # @todo embedded (needs some md5 from video page) from json import loads -from urllib import urlencode from lxml import html -from HTMLParser import HTMLParser -from searx.engines.xpath import extract_text from dateutil import parser +from searx.engines.xpath import extract_text +from searx.url_utils import urlencode + +try: + from HTMLParser import HTMLParser +except: + from html.parser import HTMLParser # engine dependent config categories = ['videos'] diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 4604c3cac..67d6a5a65 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -1,11 +1,16 @@ -from urllib import urlencode -from json import loads from collections import Iterable +from json import loads +from sys import version_info +from searx.url_utils import urlencode + +if version_info[0] == 3: + unicode = str search_url = None url_query = None content_query = None title_query = None +paging = False suggestion_query = '' results_query = '' @@ -20,7 +25,7 @@ first_page_num = 1 def iterate(iterable): if type(iterable) == dict: - it = iterable.iteritems() + it = iterable.items() else: it = enumerate(iterable) diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index 059fa2a66..5e897c96f 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -10,12 +10,11 @@ @parse url, title, content, seed, leech, magnetlink """ -from urlparse import urljoin -from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text from searx.utils import get_torrent_size, convert_str_to_int +from searx.url_utils import quote, urljoin # engine dependent config categories = ['videos', 'music', 'files'] diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index 93d98d3aa..5a70204b1 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -14,7 +14,7 @@ from json import loads from string import Formatter -from urllib import urlencode, quote +from searx.url_utils import urlencode, quote # engine dependent config categories = ['general'] diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py index 312d297eb..470c007ea 100644 --- a/searx/engines/mixcloud.py +++ b/searx/engines/mixcloud.py @@ -11,8 +11,8 @@ """ from json import loads -from urllib import urlencode from dateutil import parser +from searx.url_utils import urlencode # engine dependent config categories = ['music'] diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index 4ca5b3171..272c712c4 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -9,9 +9,9 @@ @parse url, title, content, seed, leech, torrentfile """ -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text +from searx.url_utils import urlencode # engine dependent config categories = ['files', 'images', 'videos', 'music'] diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 01ca7d42d..733ba6203 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -11,7 +11,6 @@ """ from json import loads -from searx.utils import searx_useragent # engine dependent config categories = ['map'] @@ -27,9 +26,6 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}' def request(query, params): params['url'] = base_url + search_string.format(query=query) - # using searx User-Agent - params['headers']['User-Agent'] = searx_useragent() - return params diff --git a/searx/engines/photon.py b/searx/engines/photon.py index a029bbfef..15236f680 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -10,9 +10,9 @@ @parse url, title """ -from urllib import urlencode from json import loads from searx.utils import searx_useragent +from searx.url_utils import urlencode # engine dependent config categories = ['map'] diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index ca21a3bb2..a5af8d824 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -8,11 +8,10 @@ # @stable yes (HTML can change) # @parse url, title, content, seed, leech, magnetlink -from urlparse import urljoin -from urllib import quote from lxml import html from operator import itemgetter from searx.engines.xpath import extract_text +from searx.url_utils import quote, urljoin # engine dependent config categories = ['videos', 'music', 'files'] diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 1fc4630fa..cb097eb38 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -12,9 +12,8 @@ from datetime import datetime from json import loads -from urllib import urlencode - from searx.utils import html_to_text +from searx.url_utils import urlencode # engine dependent config categories = None diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py index b29792a3a..d19724906 100644 --- a/searx/engines/reddit.py +++ b/searx/engines/reddit.py @@ -11,9 +11,8 @@ """ import json -from urllib import urlencode -from urlparse import urlparse, urljoin from datetime import datetime +from searx.url_utils import urlencode, urljoin, urlparse # engine dependent config categories = ['general', 'images', 'news', 'social media'] @@ -26,8 +25,7 @@ search_url = base_url + 'search.json?{query}' # do search-request def request(query, params): - query = urlencode({'q': query, - 'limit': page_size}) + query = urlencode({'q': query, 'limit': page_size}) params['url'] = search_url.format(query=query) return params diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py index ad78155ac..72fd2b3c9 100644 --- a/searx/engines/scanr_structures.py +++ b/searx/engines/scanr_structures.py @@ -10,9 +10,7 @@ @parse url, title, content, img_src """ -from urllib import urlencode from json import loads, dumps -from dateutil import parser from searx.utils import html_to_text # engine dependent config @@ -48,7 +46,7 @@ def response(resp): search_res = loads(resp.text) # return empty array if there are no results - if search_res.get('total') < 1: + if search_res.get('total', 0) < 1: return [] # parse results diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index be7a6d385..789e8e7a9 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -10,8 +10,8 @@ @parse url, title, content """ -from urllib import urlencode from json import loads +from searx.url_utils import urlencode # engine dependent config @@ -31,8 +31,7 @@ code_endings = {'cs': 'c#', # do search-request def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), - pageno=params['pageno'] - 1) + params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1) return params diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py index 99e10be62..4b8e9a84a 100644 --- a/searx/engines/searchcode_doc.py +++ b/searx/engines/searchcode_doc.py @@ -10,8 +10,8 @@ @parse url, title, content """ -from urllib import urlencode from json import loads +from searx.url_utils import urlencode # engine dependent config categories = ['it'] @@ -24,8 +24,7 @@ search_url = url + 'api/search_IV/?{query}&p={pageno}' # do search-request def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), - pageno=params['pageno'] - 1) + params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1) return params diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py index e1309a9b5..3770dacac 100644 --- a/searx/engines/seedpeer.py +++ b/searx/engines/seedpeer.py @@ -8,11 +8,9 @@ # @stable yes (HTML can change) # @parse url, title, content, seed, leech, magnetlink -from urlparse import urljoin -from urllib import quote from lxml import html from operator import itemgetter -from searx.engines.xpath import extract_text +from searx.url_utils import quote, urljoin url = 'http://www.seedpeer.eu/' diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 62b03ac03..41b40da61 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -11,13 +11,17 @@ """ import re -from StringIO import StringIO from json import loads -from lxml import etree -from urllib import urlencode, quote_plus +from lxml import html from dateutil import parser from searx import logger from searx.poolrequests import get as http_get +from searx.url_utils import quote_plus, urlencode + +try: + from cStringIO import StringIO +except: + from io import StringIO # engine dependent config categories = ['music'] @@ -36,14 +40,15 @@ embedded_url = '<iframe width="100%" height="166" ' +\ 'scrolling="no" frameborder="no" ' +\ 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' +cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U) + def get_client_id(): response = http_get("https://soundcloud.com") - rx_namespace = {"re": "http://exslt.org/regular-expressions"} if response.ok: - tree = etree.parse(StringIO(response.content), etree.HTMLParser()) - script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) + tree = html.fromstring(response.content) + script_tags = tree.xpath("//script[contains(@src, '/assets/app')]") app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] # extracts valid app_js urls from soundcloud.com content @@ -51,7 +56,7 @@ def get_client_id(): # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: - cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) + cids = cid_re.search(response.text) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 249ba91ef..aed756be3 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -11,7 +11,7 @@ """ from json import loads -from urllib import urlencode +from searx.url_utils import urlencode # engine dependent config categories = ['music'] @@ -29,8 +29,7 @@ embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{ def request(query, params): offset = (params['pageno'] - 1) * 20 - params['url'] = search_url.format(query=urlencode({'q': query}), - offset=offset) + params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset) return params diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index 5e7ab2901..25875aa15 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -10,10 +10,9 @@ @parse url, title, content """ -from urlparse import urljoin -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text +from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] @@ -31,8 +30,7 @@ content_xpath = './/div[@class="excerpt"]' # do search-request def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), - pageno=params['pageno']) + params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno']) return params diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 54aafdee5..314b7b9a8 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -56,7 +56,7 @@ def request(query, params): def response(resp): results = [] - dom = html.fromstring(resp.content) + dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index 77b010c3f..2cbc991b3 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -10,10 +10,10 @@ @parse url, title, content """ -from urllib import quote_plus from lxml import html from searx.languages import language_codes from searx.engines.xpath import extract_text +from searx.url_utils import quote_plus # engine dependent config categories = ['videos'] diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index dd398857f..e9c13ca24 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -11,9 +11,9 @@ """ from json import loads -from urllib import urlencode, unquote import re from lxml.html import fromstring +from searx.url_utils import unquote, urlencode # engine dependent config categories = ['general', 'images'] @@ -27,10 +27,10 @@ search_string = '?{query}&page={page}' supported_languages_url = base_url # regex -regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment') -regex_json_remove_start = re.compile(r'^initialData:\s*') -regex_json_remove_end = re.compile(r',\s*environment$') -regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=') +regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment') +regex_json_remove_start = re.compile(b'^initialData:\s*') +regex_json_remove_end = re.compile(b',\s*environment$') +regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=') # do search-request @@ -45,10 +45,9 @@ def request(query, params): ui_language = params['language'].split('-')[0] search_path = search_string.format( - query=urlencode({'query': query, - 'uiLanguage': ui_language, - 'region': region}), - page=params['pageno']) + query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}), + page=params['pageno'] + ) # image search query is something like 'image?{query}&page={page}' if params['category'] == 'images': @@ -63,14 +62,14 @@ def request(query, params): def response(resp): results = [] - json_regex = regex_json.search(resp.content) + json_regex = regex_json.search(resp.text) # check if results are returned if not json_regex: return [] - json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group())) - json = loads(json_raw) + json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group())) + json = loads(json_raw.decode('utf-8')) # parse results for result in json['Results'].get('items', []): @@ -78,7 +77,7 @@ def response(resp): # parse image results if result.get('ContentType', '').startswith('image'): - img_url = unquote(regex_img_url_remove_start.sub('', result['Url'])) + img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8')) # append result results.append({'url': result['SourceUrl'], @@ -100,7 +99,7 @@ def response(resp): # parse images for result in json.get('Images', []): # decode image url - img_url = unquote(regex_img_url_remove_start.sub('', result['Url'])) + img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8')) # append result results.append({'url': result['SourceUrl'], diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index 52b2cbe07..9a6b5e57d 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -11,11 +11,11 @@ """ import re -from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text from datetime import datetime from searx.engines.nyaa import int_or_zero, get_filesize_mul +from searx.url_utils import urlencode # engine dependent config categories = ['files', 'videos', 'music'] @@ -28,8 +28,7 @@ search_url = base_url + 'search.php?{query}' # do search-request def request(query, params): - query = urlencode({'page': params['pageno'], - 'terms': query}) + query = urlencode({'page': params['pageno'], 'terms': query}) params['url'] = search_url.format(query=query) return params @@ -50,7 +49,7 @@ def response(resp): size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) # processing the results, two rows at a time - for i in xrange(0, len(rows), 2): + for i in range(0, len(rows), 2): # parse the first row name_row = rows[i] @@ -79,14 +78,14 @@ def response(resp): groups = size_re.match(item).groups() multiplier = get_filesize_mul(groups[1]) params['filesize'] = int(multiplier * float(groups[0])) - except Exception as e: + except: pass elif item.startswith('Date:'): try: # Date: 2016-02-21 21:44 UTC date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC') params['publishedDate'] = date - except Exception as e: + except: pass elif item.startswith('Comment:'): params['content'] = item diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py index f9c832651..dda56fc22 100644 --- a/searx/engines/torrentz.py +++ b/searx/engines/torrentz.py @@ -12,11 +12,11 @@ """ import re -from urllib import urlencode from lxml import html -from searx.engines.xpath import extract_text from datetime import datetime from searx.engines.nyaa import int_or_zero, get_filesize_mul +from searx.engines.xpath import extract_text +from searx.url_utils import urlencode # engine dependent config categories = ['files', 'videos', 'music'] @@ -70,7 +70,7 @@ def response(resp): size_str = result.xpath('./dd/span[@class="s"]/text()')[0] size, suffix = size_str.split() params['filesize'] = int(size) * get_filesize_mul(suffix) - except Exception as e: + except: pass # does our link contain a valid SHA1 sum? @@ -84,7 +84,7 @@ def response(resp): # Fri, 25 Mar 2016 16:29:01 date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S') params['publishedDate'] = date - except Exception as e: + except: pass results.append(params) diff --git a/searx/engines/translated.py b/searx/engines/translated.py index e78db0d8e..5c7b17033 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -9,8 +9,12 @@ @parse url, title, content """ import re +from sys import version_info from searx.utils import is_valid_lang +if version_info[0] == 3: + unicode = str + categories = ['general'] url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index 6cca05f70..038cef47f 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -12,11 +12,10 @@ @todo publishedDate """ -from urlparse import urljoin -from urllib import urlencode from lxml import html from datetime import datetime from searx.engines.xpath import extract_text +from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['social media'] diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index 5d5310544..1408be8df 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -13,8 +13,8 @@ # @todo set content-parameter with correct data from json import loads -from urllib import urlencode from dateutil import parser +from searx.url_utils import urlencode # engine dependent config categories = ['videos'] diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 3f849bc7d..be217463c 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -14,12 +14,11 @@ from searx import logger from searx.poolrequests import get from searx.engines.xpath import extract_text -from searx.utils import format_date_by_locale from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url +from searx.url_utils import urlencode from json import loads from lxml.html import fromstring -from urllib import urlencode logger = logger.getChild('wikidata') result_count = 1 @@ -62,14 +61,13 @@ def request(query, params): language = 'en' params['url'] = url_search.format( - query=urlencode({'label': query, - 'language': language})) + query=urlencode({'label': query, 'language': language})) return params def response(resp): results = [] - html = fromstring(resp.content) + html = fromstring(resp.text) wikidata_ids = html.xpath(wikidata_ids_xpath) language = resp.search_params['language'].split('-')[0] @@ -78,10 +76,9 @@ def response(resp): # TODO: make requests asynchronous to avoid timeout when result_count > 1 for wikidata_id in wikidata_ids[:result_count]: - url = url_detail.format(query=urlencode({'page': wikidata_id, - 'uselang': language})) + url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language})) htmlresponse = get(url) - jsonresponse = loads(htmlresponse.content) + jsonresponse = loads(htmlresponse.text) results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language']) return results diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 3af8f1c71..db2fdc000 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -11,13 +11,12 @@ """ from json import loads -from urllib import urlencode, quote from lxml.html import fromstring - +from searx.url_utils import quote, urlencode # search-url -base_url = 'https://{language}.wikipedia.org/' -search_postfix = 'w/api.php?'\ +base_url = u'https://{language}.wikipedia.org/' +search_url = base_url + u'w/api.php?'\ 'action=query'\ '&format=json'\ '&{query}'\ @@ -37,16 +36,16 @@ def url_lang(lang): else: language = lang - return base_url.format(language=language) + return language # do search-request def request(query, params): if query.islower(): - query += '|' + query.title() + query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8') - params['url'] = url_lang(params['language']) \ - + search_postfix.format(query=urlencode({'titles': query})) + params['url'] = search_url.format(query=urlencode({'titles': query}), + language=url_lang(params['language'])) return params @@ -78,7 +77,7 @@ def extract_first_paragraph(content, title, image): def response(resp): results = [] - search_result = loads(resp.content) + search_result = loads(resp.text) # wikipedia article's unique id # first valid id is assumed to be the requested article @@ -99,11 +98,9 @@ def response(resp): extract = page.get('extract') summary = extract_first_paragraph(extract, title, image) - if not summary: - return [] # link to wikipedia article - wikipedia_link = url_lang(resp.search_params['language']) \ + wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \ + 'wiki/' + quote(title.replace(' ', '_').encode('utf8')) results.append({'url': wikipedia_link, 'title': title}) diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index e743c8f56..595c6b7de 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -8,8 +8,8 @@ # @stable yes # @parse url, infobox -from urllib import urlencode from lxml import etree +from searx.url_utils import urlencode # search-url search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' @@ -37,8 +37,7 @@ image_pods = {'VisualRepresentation', # do search-request def request(query, params): - params['url'] = search_url.format(query=urlencode({'input': query}), - api_key=api_key) + params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key) params['headers']['Referer'] = site_url.format(query=urlencode({'i': query})) return params @@ -56,7 +55,7 @@ def replace_pua_chars(text): u'\uf74e': 'i', # imaginary number u'\uf7d9': '='} # equals sign - for k, v in pua_chars.iteritems(): + for k, v in pua_chars.items(): text = text.replace(k, v) return text @@ -66,7 +65,7 @@ def replace_pua_chars(text): def response(resp): results = [] - search_results = etree.XML(resp.content) + search_results = etree.XML(resp.text) # return empty array if there are no results if search_results.xpath(failure_xpath): @@ -120,10 +119,10 @@ def response(resp): # append infobox results.append({'infobox': infobox_title, 'attributes': result_chunks, - 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) + 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]}) # append link to site - results.append({'url': resp.request.headers['Referer'].decode('utf8'), + results.append({'url': resp.request.headers['Referer'], 'title': title, 'content': result_content}) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 1534501b3..2a8642f92 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -10,10 +10,9 @@ from json import loads from time import time -from urllib import urlencode -from lxml.etree import XML from searx.poolrequests import get as http_get +from searx.url_utils import urlencode # search-url url = 'https://www.wolframalpha.com/' @@ -62,7 +61,7 @@ obtain_token() # do search-request def request(query, params): # obtain token if last update was more than an hour - if time() - token['last_updated'] > 3600: + if time() - (token['last_updated'] or 0) > 3600: obtain_token() params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value']) params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query})) @@ -112,9 +111,9 @@ def response(resp): results.append({'infobox': infobox_title, 'attributes': result_chunks, - 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) + 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]}) - results.append({'url': resp.request.headers['Referer'].decode('utf8'), + results.append({'url': resp.request.headers['Referer'], 'title': 'Wolfram|Alpha (' + infobox_title + ')', 'content': result_content}) diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index 1269a5422..508803240 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -10,11 +10,9 @@ @parse url, title, thumbnail, img_src, content """ -from urllib import urlencode -from urlparse import urljoin from lxml import html -import string import re +from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['images'] @@ -55,7 +53,7 @@ def response(resp): cur_element += result_part # fix xml-error - cur_element = string.replace(cur_element, '"></a>', '"/></a>') + cur_element = cur_element.replace('"></a>', '"/></a>') dom = html.fromstring(cur_element) link = dom.xpath('//a')[0] diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py index 546521ba3..7a2015ae9 100644 --- a/searx/engines/www500px.py +++ b/searx/engines/www500px.py @@ -13,8 +13,7 @@ """ from json import loads -from urllib import urlencode -from urlparse import urljoin +from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['images'] diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 0d39b28a8..f466697bd 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -1,13 +1,13 @@ from lxml import html -from urllib import urlencode, unquote -from urlparse import urlparse, urljoin from lxml.etree import _ElementStringResult, _ElementUnicodeResult from searx.utils import html_to_text +from searx.url_utils import unquote, urlencode, urljoin, urlparse search_url = None url_xpath = None content_xpath = None title_xpath = None +paging = False suggestion_xpath = '' results_xpath = '' diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 7b1b6b35d..a62a1296e 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -13,8 +13,8 @@ # @todo parse video, audio and file results from json import loads -from urllib import urlencode from dateutil import parser +from searx.url_utils import urlencode from searx.utils import html_to_text diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 5c62c2ed8..5387aaf54 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -11,10 +11,9 @@ @parse url, title, content, suggestion """ -from urllib import urlencode -from urlparse import unquote from lxml import html from searx.engines.xpath import extract_text, extract_url +from searx.url_utils import unquote, urlencode # engine dependent config categories = ['general'] diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 1a0fd28f5..ae54a4acd 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -9,13 +9,13 @@ # @stable no (HTML can change) # @parse url, title, content, publishedDate -from urllib import urlencode +import re +from datetime import datetime, timedelta from lxml import html from searx.engines.xpath import extract_text, extract_url from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url -from datetime import datetime, timedelta -import re from dateutil import parser +from searx.url_utils import urlencode # engine dependent config categories = ['news'] diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py index 65aee28b8..1c789f6cb 100644 --- a/searx/engines/yandex.py +++ b/searx/engines/yandex.py @@ -9,9 +9,9 @@ @parse url, title, content """ -from urllib import urlencode from lxml import html -from searx.search import logger +from searx import logger +from searx.url_utils import urlencode logger = logger.getChild('yandex engine') diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py index 1dfca5166..6de18aa2c 100644 --- a/searx/engines/youtube_api.py +++ b/searx/engines/youtube_api.py @@ -9,8 +9,8 @@ # @parse url, title, content, publishedDate, thumbnail, embedded from json import loads -from urllib import urlencode from dateutil import parser +from searx.url_utils import urlencode # engine dependent config categories = ['videos', 'music'] diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 9b7ca64c8..9f01841f6 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -8,10 +8,10 @@ # @stable no # @parse url, title, content, publishedDate, thumbnail, embedded -from urllib import quote_plus from lxml import html from searx.engines.xpath import extract_text from searx.utils import list_get +from searx.url_utils import quote_plus # engine dependent config categories = ['videos', 'music'] diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 011d36260..46c1f8918 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -14,9 +14,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, <asciimoo@gmail.com> ''' -from sys import exit +from sys import exit, version_info from searx import logger +if version_info[0] == 3: + unicode = str + logger = logger.getChild('plugins') from searx.plugins import (doai_rewrite, diff --git a/searx/plugins/doai_rewrite.py b/searx/plugins/doai_rewrite.py index a6e15ae5a..95efa8f9b 100644 --- a/searx/plugins/doai_rewrite.py +++ b/searx/plugins/doai_rewrite.py @@ -1,6 +1,6 @@ from flask_babel import gettext import re -from urlparse import urlparse, parse_qsl +from searx.url_utils import urlparse, parse_qsl regex = re.compile(r'10\.\d{4,9}/[^\s]+') diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py index 8b4c9784e..4462c86bc 100644 --- a/searx/plugins/https_rewrite.py +++ b/searx/plugins/https_rewrite.py @@ -16,14 +16,17 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' import re -from urlparse import urlparse +import sys from lxml import etree from os import listdir, environ from os.path import isfile, isdir, join from searx.plugins import logger from flask_babel import gettext from searx import searx_dir +from searx.url_utils import urlparse +if sys.version_info[0] == 3: + unicode = str name = "HTTPS rewrite" description = gettext('Rewrite HTTP links to HTTPS if possible') diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index a2aeda98e..8d6c661ad 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -22,7 +22,7 @@ default_on = True # Self User Agent regex -p = re.compile('.*user[ -]agent.*', re.IGNORECASE) +p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE) # attach callback to the post search hook @@ -31,7 +31,7 @@ p = re.compile('.*user[ -]agent.*', re.IGNORECASE) def post_search(request, search): if search.search_query.pageno > 1: return True - if search.search_query.query == 'ip': + if search.search_query.query == b'ip': x_forwarded_for = request.headers.getlist("X-Forwarded-For") if x_forwarded_for: ip = x_forwarded_for[0] diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 68a004e33..a84012828 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. from flask_babel import gettext import re -from urlparse import urlunparse +from searx.url_utils import urlunparse regexes = {re.compile(r'utm_[^&]+&?'), re.compile(r'(wkey|wemail)[^&]+&?'), diff --git a/searx/preferences.py b/searx/preferences.py index 43d9ec0dd..b6a2ec4cc 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -23,7 +23,7 @@ class Setting(object): def __init__(self, default_value, **kwargs): super(Setting, self).__init__() self.value = default_value - for key, value in kwargs.iteritems(): + for key, value in kwargs.items(): setattr(self, key, value) self._post_init() @@ -38,7 +38,7 @@ class Setting(object): return self.value def save(self, name, resp): - resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE) + resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE) class StringSetting(Setting): @@ -133,7 +133,7 @@ class MapSetting(Setting): def save(self, name, resp): if hasattr(self, 'key'): - resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE) + resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE) class SwitchableSetting(Setting): @@ -194,7 +194,7 @@ class EnginesSetting(SwitchableSetting): def _post_init(self): super(EnginesSetting, self)._post_init() transformed_choices = [] - for engine_name, engine in self.choices.iteritems(): + for engine_name, engine in self.choices.items(): for category in engine.categories: transformed_choice = dict() transformed_choice['default_on'] = not engine.disabled @@ -241,9 +241,9 @@ class Preferences(object): 'language': SearchLanguageSetting(settings['search']['language'], choices=LANGUAGE_CODES), 'locale': EnumStringSetting(settings['ui']['default_locale'], - choices=settings['locales'].keys() + ['']), + choices=list(settings['locales'].keys()) + ['']), 'autocomplete': EnumStringSetting(settings['search']['autocomplete'], - choices=autocomplete.backends.keys() + ['']), + choices=list(autocomplete.backends.keys()) + ['']), 'image_proxy': MapSetting(settings['server']['image_proxy'], map={'': settings['server']['image_proxy'], '0': False, @@ -260,7 +260,7 @@ class Preferences(object): self.unknown_params = {} def parse_cookies(self, input_data): - for user_setting_name, user_setting in input_data.iteritems(): + for user_setting_name, user_setting in input_data.items(): if user_setting_name in self.key_value_settings: self.key_value_settings[user_setting_name].parse(user_setting) elif user_setting_name == 'disabled_engines': @@ -274,7 +274,7 @@ class Preferences(object): disabled_engines = [] enabled_categories = [] disabled_plugins = [] - for user_setting_name, user_setting in input_data.iteritems(): + for user_setting_name, user_setting in input_data.items(): if user_setting_name in self.key_value_settings: self.key_value_settings[user_setting_name].parse(user_setting) elif user_setting_name.startswith('engine_'): @@ -295,7 +295,7 @@ class Preferences(object): return self.key_value_settings[user_setting_name].get_value() def save(self, resp): - for user_setting_name, user_setting in self.key_value_settings.iteritems(): + for user_setting_name, user_setting in self.key_value_settings.items(): user_setting.save(user_setting_name, resp) self.engines.save(resp) self.plugins.save(resp) diff --git a/searx/query.py b/searx/query.py index b8b1c0d2f..828a6fb30 100644 --- a/searx/query.py +++ b/searx/query.py @@ -21,8 +21,12 @@ from searx.languages import language_codes from searx.engines import ( categories, engines, engine_shortcuts ) -import string import re +import string +import sys + +if sys.version_info[0] == 3: + unicode = str VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') @@ -146,7 +150,7 @@ class SearchQuery(object): """container for all the search parameters (query, language, etc...)""" def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range): - self.query = query + self.query = query.encode('utf-8') self.engines = engines self.categories = categories self.lang = lang diff --git a/searx/results.py b/searx/results.py index e262ec110..b6d408e29 100644 --- a/searx/results.py +++ b/searx/results.py @@ -1,9 +1,13 @@ import re +import sys from collections import defaultdict from operator import itemgetter from threading import RLock -from urlparse import urlparse, unquote from searx.engines import engines +from searx.url_utils import urlparse, unquote + +if sys.version_info[0] == 3: + basestring = str CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) diff --git a/searx/search.py b/searx/search.py index 980cfeb99..790e7d071 100644 --- a/searx/search.py +++ b/searx/search.py @@ -16,8 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' import gc +import sys import threading -from thread import start_new_thread from time import time from uuid import uuid4 import requests.exceptions @@ -33,6 +33,14 @@ from searx import logger from searx.plugins import plugins from searx.exceptions import SearxParameterException +try: + from thread import start_new_thread +except: + from _thread import start_new_thread + +if sys.version_info[0] == 3: + unicode = str + logger = logger.getChild('search') number_of_searches = 0 @@ -387,7 +395,7 @@ class Search(object): request_params['time_range'] = search_query.time_range # append request to list - requests.append((selected_engine['name'], search_query.query.encode('utf-8'), request_params)) + requests.append((selected_engine['name'], search_query.query, request_params)) # update timeout_limit timeout_limit = max(timeout_limit, engine.timeout) diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml index dbaf2fd52..59320480c 100644 --- a/searx/settings_robot.yml +++ b/searx/settings_robot.yml @@ -17,7 +17,7 @@ server: ui: themes_path : "" - default_theme : legacy + default_theme : oscar default_locale : "" outgoing: diff --git a/searx/templates/courgette/404.html b/searx/templates/courgette/404.html index 77f1287ab..9e3b8ac29 100644 --- a/searx/templates/courgette/404.html +++ b/searx/templates/courgette/404.html @@ -3,7 +3,7 @@ <div class="center"> <h1>{{ _('Page not found') }}</h1> {% autoescape false %} - <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p> + <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p> {% endautoescape %} </div> {% endblock %} diff --git a/searx/templates/legacy/404.html b/searx/templates/legacy/404.html index 05c14e155..3e889dd21 100644 --- a/searx/templates/legacy/404.html +++ b/searx/templates/legacy/404.html @@ -3,7 +3,7 @@ <div class="center"> <h1>{{ _('Page not found') }}</h1> {% autoescape false %} - <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p> + <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p> {% endautoescape %} </div> {% endblock %} diff --git a/searx/templates/oscar/404.html b/searx/templates/oscar/404.html index 11d789564..5a50880a9 100644 --- a/searx/templates/oscar/404.html +++ b/searx/templates/oscar/404.html @@ -3,7 +3,7 @@ <div class="text-center"> <h1>{{ _('Page not found') }}</h1> {% autoescape false %} - <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p> + <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p> {% endautoescape %} </div> {% endblock %} diff --git a/searx/templates/pix-art/404.html b/searx/templates/pix-art/404.html index 592e8610f..389bb5ec1 100644 --- a/searx/templates/pix-art/404.html +++ b/searx/templates/pix-art/404.html @@ -3,7 +3,7 @@ <div class="center"> <h1>{{ _('Page not found') }}</h1> {% autoescape false %} - <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p> + <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p> {% endautoescape %} </div> {% endblock %} diff --git a/searx/testing.py b/searx/testing.py index 312e9f295..0d17b2a08 100644 --- a/searx/testing.py +++ b/searx/testing.py @@ -1,13 +1,16 @@ # -*- coding: utf-8 -*- """Shared testing code.""" -from plone.testing import Layer -from unittest2 import TestCase -from os.path import dirname, join, abspath - import os import subprocess +import traceback + + +from os.path import dirname, join, abspath + +from splinter import Browser +from unittest2 import TestCase class SearxTestLayer: @@ -32,7 +35,7 @@ class SearxTestLayer: testTearDown = classmethod(testTearDown) -class SearxRobotLayer(Layer): +class SearxRobotLayer(): """Searx Robot Test Layer""" def setUp(self): @@ -62,7 +65,12 @@ class SearxRobotLayer(Layer): del os.environ['SEARX_SETTINGS_PATH'] -SEARXROBOTLAYER = SearxRobotLayer() +# SEARXROBOTLAYER = SearxRobotLayer() +def run_robot_tests(tests): + print('Running {0} tests'.format(len(tests))) + for test in tests: + with Browser() as browser: + test(browser) class SearxTestCase(TestCase): @@ -72,17 +80,19 @@ class SearxTestCase(TestCase): if __name__ == '__main__': - from tests.test_robot import test_suite import sys - from zope.testrunner.runner import Runner + # test cases + from tests import robot base_dir = abspath(join(dirname(__file__), '../tests')) if sys.argv[1] == 'robot': - r = Runner(['--color', - '--auto-progress', - '--stop-on-error', - '--path', - base_dir], - found_suites=[test_suite()]) - r.run() - sys.exit(int(r.failed)) + test_layer = SearxRobotLayer() + errors = False + try: + test_layer.setUp() + run_robot_tests([getattr(robot, x) for x in dir(robot) if x.startswith('test_')]) + except Exception: + errors = True + print('Error occured: {0}'.format(traceback.format_exc())) + test_layer.tearDown() + sys.exit(1 if errors else 0) diff --git a/searx/url_utils.py b/searx/url_utils.py new file mode 100644 index 000000000..e9919ab30 --- /dev/null +++ b/searx/url_utils.py @@ -0,0 +1,28 @@ +from sys import version_info + +if version_info[0] == 2: + from urllib import quote, quote_plus, unquote, urlencode + from urlparse import parse_qsl, urljoin, urlparse, urlunparse, ParseResult +else: + from urllib.parse import ( + parse_qsl, + quote, + quote_plus, + unquote, + urlencode, + urljoin, + urlparse, + urlunparse, + ParseResult + ) + + +__export__ = (parse_qsl, + quote, + quote_plus, + unquote, + urlencode, + urljoin, + urlparse, + urlunparse, + ParseResult) diff --git a/searx/utils.py b/searx/utils.py index 35cb6f8a6..f24c57afa 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -1,11 +1,9 @@ -import cStringIO import csv import os import re from babel.dates import format_date from codecs import getincrementalencoder -from HTMLParser import HTMLParser from imp import load_source from os.path import splitext, join from random import choice @@ -16,6 +14,19 @@ from searx.languages import language_codes from searx import settings from searx import logger +try: + from cStringIO import StringIO +except: + from io import StringIO + +try: + from HTMLParser import HTMLParser +except: + from html.parser import HTMLParser + +if sys.version_info[0] == 3: + unichr = chr + unicode = str logger = logger.getChild('utils') @@ -140,7 +151,7 @@ class UnicodeWriter: def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): # Redirect output to a queue - self.queue = cStringIO.StringIO() + self.queue = StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f self.encoder = getincrementalencoder(encoding)() @@ -152,14 +163,13 @@ class UnicodeWriter: unicode_row.append(col.encode('utf-8').strip()) else: unicode_row.append(col) - self.writer.writerow(unicode_row) + self.writer.writerow([x.decode('utf-8') if hasattr(x, 'decode') else x for x in unicode_row]) # Fetch UTF-8 output from the queue ... - data = self.queue.getvalue() - data = data.decode("utf-8") + data = self.queue.getvalue().strip('\x00') # ... and reencode it into the target encoding data = self.encoder.encode(data) # write to the target stream - self.stream.write(data) + self.stream.write(data.decode('utf-8')) # empty queue self.queue.truncate(0) @@ -231,7 +241,7 @@ def dict_subset(d, properties): def prettify_url(url, max_length=74): if len(url) > max_length: - chunk_len = max_length / 2 + 1 + chunk_len = int(max_length / 2 + 1) return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:]) else: return url diff --git a/searx/webapp.py b/searx/webapp.py index 2aba4556d..03b572955 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -22,11 +22,12 @@ if __name__ == '__main__': from os.path import realpath, dirname path.append(realpath(dirname(realpath(__file__)) + '/../')) -import cStringIO import hashlib import hmac import json import os +import sys + import requests from searx import logger @@ -42,8 +43,6 @@ except: exit(1) from cgi import escape from datetime import datetime, timedelta -from urllib import urlencode -from urlparse import urlparse, urljoin from werkzeug.contrib.fixers import ProxyFix from flask import ( Flask, request, render_template, url_for, Response, make_response, @@ -52,7 +51,7 @@ from flask import ( from flask_babel import Babel, gettext, format_date, format_decimal from flask.json import jsonify from searx import settings, searx_dir, searx_debug -from searx.exceptions import SearxException, SearxParameterException +from searx.exceptions import SearxParameterException from searx.engines import ( categories, engines, engine_shortcuts, get_engines_stats, initialize_engines ) @@ -69,6 +68,7 @@ from searx.autocomplete import searx_bang, backends as autocomplete_backends from searx.plugins import plugins from searx.preferences import Preferences, ValidationException from searx.answerers import answerers +from searx.url_utils import urlencode, urlparse, urljoin # check if the pyopenssl package is installed. # It is needed for SSL connection without trouble, see #298 @@ -78,6 +78,15 @@ except ImportError: logger.critical("The pyopenssl package has to be installed.\n" "Some HTTPS connections will fail") +try: + from cStringIO import StringIO +except: + from io import StringIO + + +if sys.version_info[0] == 3: + unicode = str + # serve pages with HTTP/1.1 from werkzeug.serving import WSGIRequestHandler WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0')) @@ -357,6 +366,8 @@ def render(template_name, override_theme=None, **kwargs): kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') + kwargs['unicode'] = unicode + kwargs['scripts'] = set() for plugin in request.user_plugins: for script in plugin.js_dependencies: @@ -375,7 +386,7 @@ def render(template_name, override_theme=None, **kwargs): def pre_request(): request.errors = [] - preferences = Preferences(themes, categories.keys(), engines, plugins) + preferences = Preferences(themes, list(categories.keys()), engines, plugins) request.preferences = preferences try: preferences.parse_cookies(request.cookies) @@ -479,10 +490,8 @@ def index(): for result in results: if output_format == 'html': if 'content' in result and result['content']: - result['content'] = highlight_content(escape(result['content'][:1024]), - search_query.query.encode('utf-8')) - result['title'] = highlight_content(escape(result['title'] or u''), - search_query.query.encode('utf-8')) + result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query) + result['title'] = highlight_content(escape(result['title'] or u''), search_query.query) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() @@ -510,7 +519,7 @@ def index(): result['publishedDate'] = format_date(result['publishedDate']) if output_format == 'json': - return Response(json.dumps({'query': search_query.query, + return Response(json.dumps({'query': search_query.query.decode('utf-8'), 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), @@ -519,7 +528,7 @@ def index(): 'suggestions': list(result_container.suggestions)}), mimetype='application/json') elif output_format == 'csv': - csv = UnicodeWriter(cStringIO.StringIO()) + csv = UnicodeWriter(StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score') csv.writerow(keys) for row in results: @@ -527,7 +536,7 @@ def index(): csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') - cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8')) + cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) response.headers.add('Content-Disposition', cont_disp) return response elif output_format == 'rss': @@ -578,7 +587,7 @@ def autocompleter(): disabled_engines = request.preferences.engines.get_disabled() # parse query - raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines) + raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines) raw_text_query.parse_query() # check if search query is set @@ -820,6 +829,7 @@ def page_not_found(e): def run(): + logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address']) app.run( debug=searx_debug, use_debugger=searx_debug, diff --git a/tests/robot/__init__.py b/tests/robot/__init__.py index e69de29bb..038a3196f 100644 --- a/tests/robot/__init__.py +++ b/tests/robot/__init__.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +from time import sleep + +url = "http://localhost:11111/" + + +def test_index(browser): + # Visit URL + browser.visit(url) + assert browser.is_text_present('about') + + +def test_404(browser): + # Visit URL + browser.visit(url + 'missing_link') + assert browser.is_text_present('Page not found') + + +def test_about(browser): + browser.visit(url) + browser.click_link_by_text('about') + assert browser.is_text_present('Why use searx?') + + +def test_preferences(browser): + browser.visit(url) + browser.click_link_by_text('preferences') + assert browser.is_text_present('Preferences') + assert browser.is_text_present('Cookies') + + assert browser.is_element_present_by_xpath('//label[@for="checkbox_dummy"]') + + +def test_preferences_engine_select(browser): + browser.visit(url) + browser.click_link_by_text('preferences') + + assert browser.is_element_present_by_xpath('//a[@href="#tab_engine"]') + browser.find_by_xpath('//a[@href="#tab_engine"]').first.click() + + assert not browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked + browser.find_by_xpath('//label[@for="engine_general_dummy__general"]').first.check() + browser.find_by_xpath('//input[@value="save"]').first.click() + + # waiting for the redirect - without this the test is flaky.. + sleep(1) + + browser.visit(url) + browser.click_link_by_text('preferences') + browser.find_by_xpath('//a[@href="#tab_engine"]').first.click() + + assert browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked + + +def test_preferences_locale(browser): + browser.visit(url) + browser.click_link_by_text('preferences') + + browser.select('locale', 'hu') + browser.find_by_xpath('//input[@value="save"]').first.click() + + # waiting for the redirect - without this the test is flaky.. + sleep(1) + + browser.visit(url) + browser.click_link_by_text('beállítások') + browser.is_text_present('Beállítások') + + +def test_search(browser): + browser.visit(url) + browser.fill('q', 'test search query') + browser.find_by_xpath('//button[@type="submit"]').first.click() + assert browser.is_text_present('didn\'t find any results') diff --git a/tests/robot/test_basic.robot b/tests/robot/test_basic.robot deleted file mode 100644 index d0074cd00..000000000 --- a/tests/robot/test_basic.robot +++ /dev/null @@ -1,153 +0,0 @@ -*** Settings *** -Library Selenium2Library timeout=10 implicit_wait=0.5 -Test Setup Open Browser http://localhost:11111/ -Test Teardown Close All Browsers - - -*** Keywords *** -Submit Preferences - Set Selenium Speed 2 seconds - Submit Form id=search_form - Location Should Be http://localhost:11111/ - Set Selenium Speed 0 seconds - - -*** Test Cases *** -Front page - Page Should Contain about - Page Should Contain preferences - -404 page - Go To http://localhost:11111/no-such-page - Page Should Contain Page not found - Page Should Contain Go to search page - -About page - Click Element link=about - Page Should Contain Why use searx? - Page Should Contain Element link=search engines - -Preferences page - Click Element link=preferences - Page Should Contain Preferences - Page Should Contain Default categories - Page Should Contain Currently used search engines - Page Should Contain dummy dummy - Page Should Contain general dummy - -Switch category - Go To http://localhost:11111/preferences - Page Should Contain Checkbox category_general - Page Should Contain Checkbox category_dummy - Click Element xpath=//*[.="general"] - Click Element xpath=//*[.="dummy"] - Submit Preferences - Checkbox Should Not Be Selected category_general - Checkbox Should Be Selected category_dummy - -Change language - Page Should Contain about - Page Should Contain preferences - Go To http://localhost:11111/preferences - Select From List locale hu - Submit Preferences - Page Should Contain rólunk - Page Should Contain beállítások - -Change method - Page Should Contain about - Page Should Contain preferences - Go To http://localhost:11111/preferences - Select From List method GET - Submit Preferences - Go To http://localhost:11111/preferences - List Selection Should Be method GET - Select From List method POST - Submit Preferences - Go To http://localhost:11111/preferences - List Selection Should Be method POST - -Change theme - Page Should Contain about - Page Should Contain preferences - Go To http://localhost:11111/preferences - List Selection Should Be theme legacy - Select From List theme oscar - Submit Preferences - Go To http://localhost:11111/preferences - List Selection Should Be theme oscar - -Change safesearch - Page Should Contain about - Page Should Contain preferences - Go To http://localhost:11111/preferences - List Selection Should Be safesearch None - Select From List safesearch Strict - Submit Preferences - Go To http://localhost:11111/preferences - List Selection Should Be safesearch Strict - -Change image proxy - Page Should Contain about - Page Should Contain preferences - Go To http://localhost:11111/preferences - List Selection Should Be image_proxy Disabled - Select From List image_proxy Enabled - Submit Preferences - Go To http://localhost:11111/preferences - List Selection Should Be image_proxy Enabled - -Change search language - Page Should Contain about - Page Should Contain preferences - Go To http://localhost:11111/preferences - List Selection Should Be language Default language - Select From List language Türkçe - tr-TR - Submit Preferences - Go To http://localhost:11111/preferences - List Selection Should Be language Türkçe - tr-TR - -Change autocomplete - Page Should Contain about - Page Should Contain preferences - Go To http://localhost:11111/preferences - List Selection Should Be autocomplete - - Select From List autocomplete google - Submit Preferences - Go To http://localhost:11111/preferences - List Selection Should Be autocomplete google - -Change allowed/disabled engines - Page Should Contain about - Page Should Contain preferences - Go To http://localhost:11111/preferences - Page Should Contain Engine name - Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block - Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] Block - Click Element xpath=//label[@class="deny"][@for='engine_general_general_dummy'] - Submit Preferences - Page Should Contain about - Page Should Contain preferences - Go To http://localhost:11111/preferences - Page Should Contain Engine name - Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block - Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] \ - -Block a plugin - Page Should Contain about - Page Should Contain preferences - Go To http://localhost:11111/preferences - List Selection Should Be theme legacy - Select From List theme oscar - Submit Preferences - Go To http://localhost:11111/preferences - List Selection Should Be theme oscar - Page Should Contain Plugins - Click Link Plugins - Checkbox Should Not Be Selected id=plugin_HTTPS_rewrite - Click Element xpath=//label[@for='plugin_HTTPS_rewrite'] - Submit Preferences - Go To http://localhost:11111/preferences - Page Should Contain Plugins - Click Link Plugins - Checkbox Should Be Selected id=plugin_HTTPS_rewrite diff --git a/tests/unit/engines/test_archlinux.py b/tests/unit/engines/test_archlinux.py index d0009d63a..e4ee0339c 100644 --- a/tests/unit/engines/test_archlinux.py +++ b/tests/unit/engines/test_archlinux.py @@ -25,7 +25,7 @@ class TestArchLinuxEngine(SearxTestCase): self.assertTrue(query in params['url']) self.assertTrue('wiki.archlinux.org' in params['url']) - for lang, domain in domains.iteritems(): + for lang, domain in domains.items(): dic['language'] = lang params = archlinux.request(query, dic) self.assertTrue(domain in params['url']) @@ -102,5 +102,5 @@ class TestArchLinuxEngine(SearxTestCase): for exp in expected: res = results[i] i += 1 - for key, value in exp.iteritems(): + for key, value in exp.items(): self.assertEqual(res[key], value) diff --git a/tests/unit/engines/test_bing.py b/tests/unit/engines/test_bing.py index a63b2e333..523ec57b8 100644 --- a/tests/unit/engines/test_bing.py +++ b/tests/unit/engines/test_bing.py @@ -7,18 +7,18 @@ from searx.testing import SearxTestCase class TestBingEngine(SearxTestCase): def test_request(self): - query = 'test_query' + query = u'test_query' dicto = defaultdict(dict) dicto['pageno'] = 0 dicto['language'] = 'fr_FR' - params = bing.request(query, dicto) + params = bing.request(query.encode('utf-8'), dicto) self.assertTrue('url' in params) self.assertTrue(query in params['url']) self.assertTrue('language%3AFR' in params['url']) self.assertTrue('bing.com' in params['url']) dicto['language'] = 'all' - params = bing.request(query, dicto) + params = bing.request(query.encode('utf-8'), dicto) self.assertTrue('language' in params['url']) def test_response(self): diff --git a/tests/unit/engines/test_bing_news.py b/tests/unit/engines/test_bing_news.py index b6793f7be..e571adcee 100644 --- a/tests/unit/engines/test_bing_news.py +++ b/tests/unit/engines/test_bing_news.py @@ -36,10 +36,10 @@ class TestBingNewsEngine(SearxTestCase): self.assertRaises(AttributeError, bing_news.response, '') self.assertRaises(AttributeError, bing_news.response, '[]') - response = mock.Mock(content='<html></html>') + response = mock.Mock(text='<html></html>') self.assertEqual(bing_news.response(response), []) - response = mock.Mock(content='<html></html>') + response = mock.Mock(text='<html></html>') self.assertEqual(bing_news.response(response), []) html = """<?xml version="1.0" encoding="utf-8" ?> @@ -74,7 +74,7 @@ class TestBingNewsEngine(SearxTestCase): </item> </channel> </rss>""" # noqa - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = bing_news.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -113,7 +113,7 @@ class TestBingNewsEngine(SearxTestCase): </item> </channel> </rss>""" # noqa - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = bing_news.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -136,11 +136,11 @@ class TestBingNewsEngine(SearxTestCase): </channel> </rss>""" # noqa - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = bing_news.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) html = """<?xml version="1.0" encoding="utf-8" ?>gabarge""" - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response) diff --git a/tests/unit/engines/test_btdigg.py b/tests/unit/engines/test_btdigg.py index 2721f4e7c..6a88e3f75 100644 --- a/tests/unit/engines/test_btdigg.py +++ b/tests/unit/engines/test_btdigg.py @@ -22,10 +22,10 @@ class TestBtdiggEngine(SearxTestCase): self.assertRaises(AttributeError, btdigg.response, '') self.assertRaises(AttributeError, btdigg.response, '[]') - response = mock.Mock(content='<html></html>') + response = mock.Mock(text='<html></html>') self.assertEqual(btdigg.response(response), []) - html = """ + html = u""" <div id="search_res"> <table> <tr> @@ -82,7 +82,7 @@ class TestBtdiggEngine(SearxTestCase): </table> </div> """ - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -101,12 +101,12 @@ class TestBtdiggEngine(SearxTestCase): </table> </div> """ - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) - html = """ + html = u""" <div id="search_res"> <table> <tr> @@ -367,7 +367,7 @@ class TestBtdiggEngine(SearxTestCase): </table> </div> """ - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = btdigg.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 5) diff --git a/tests/unit/engines/test_currency_convert.py b/tests/unit/engines/test_currency_convert.py index b7720569f..2814d791d 100644 --- a/tests/unit/engines/test_currency_convert.py +++ b/tests/unit/engines/test_currency_convert.py @@ -8,13 +8,13 @@ from searx.testing import SearxTestCase class TestCurrencyConvertEngine(SearxTestCase): def test_request(self): - query = 'test_query' + query = b'test_query' dicto = defaultdict(dict) dicto['pageno'] = 1 params = currency_convert.request(query, dicto) self.assertNotIn('url', params) - query = 'convert 10 Pound Sterlings to United States Dollars' + query = b'convert 10 Pound Sterlings to United States Dollars' params = currency_convert.request(query, dicto) self.assertIn('url', params) self.assertIn('finance.yahoo.com', params['url']) diff --git a/tests/unit/engines/test_digbt.py b/tests/unit/engines/test_digbt.py index 31a1b03a4..31c2ecabb 100644 --- a/tests/unit/engines/test_digbt.py +++ b/tests/unit/engines/test_digbt.py @@ -21,7 +21,7 @@ class TestDigBTEngine(SearxTestCase): self.assertRaises(AttributeError, digbt.response, '') self.assertRaises(AttributeError, digbt.response, '[]') - response = mock.Mock(content='<html></html>') + response = mock.Mock(text='<html></html>') self.assertEqual(digbt.response(response), []) html = """ @@ -50,7 +50,7 @@ class TestDigBTEngine(SearxTestCase): </td></tr> </table> """ - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = digbt.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py index 7d6abad22..8502a2e51 100644 --- a/tests/unit/engines/test_duckduckgo.py +++ b/tests/unit/engines/test_duckduckgo.py @@ -90,8 +90,7 @@ class TestDuckduckgoEngine(SearxTestCase): "wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)" }some more code...""" response = mock.Mock(text=js) - languages = duckduckgo._fetch_supported_languages(response) - self.assertEqual(type(languages), list) + languages = list(duckduckgo._fetch_supported_languages(response)) self.assertEqual(len(languages), 5) self.assertIn('wt-WT', languages) self.assertIn('es-AR', languages) diff --git a/tests/unit/engines/test_frinkiac.py b/tests/unit/engines/test_frinkiac.py index f3eb021d2..5ea220cd3 100644 --- a/tests/unit/engines/test_frinkiac.py +++ b/tests/unit/engines/test_frinkiac.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from collections import defaultdict import mock -from json import dumps from searx.engines import frinkiac from searx.testing import SearxTestCase @@ -44,6 +43,8 @@ class TestFrinkiacEngine(SearxTestCase): self.assertEqual(type(results), list) self.assertEqual(len(results), 4) self.assertEqual(results[0]['title'], u'S06E18') - self.assertEqual(results[0]['url'], 'https://frinkiac.com/?p=caption&e=S06E18&t=534616') + self.assertIn('p=caption', results[0]['url']) + self.assertIn('e=S06E18', results[0]['url']) + self.assertIn('t=534616', results[0]['url']) self.assertEqual(results[0]['thumbnail_src'], 'https://frinkiac.com/img/S06E18/534616/medium.jpg') self.assertEqual(results[0]['img_src'], 'https://frinkiac.com/img/S06E18/534616.jpg') diff --git a/tests/unit/engines/test_gigablast.py b/tests/unit/engines/test_gigablast.py index 127b974b5..6b2d26458 100644 --- a/tests/unit/engines/test_gigablast.py +++ b/tests/unit/engines/test_gigablast.py @@ -10,6 +10,7 @@ class TestGigablastEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['pageno'] = 0 + dicto['safesearch'] = 0 dicto['language'] = 'all' params = gigablast.request(query, dicto) self.assertTrue('url' in params) diff --git a/tests/unit/engines/test_soundcloud.py b/tests/unit/engines/test_soundcloud.py index 85495dc57..3077d3b4b 100644 --- a/tests/unit/engines/test_soundcloud.py +++ b/tests/unit/engines/test_soundcloud.py @@ -2,7 +2,7 @@ from collections import defaultdict import mock from searx.engines import soundcloud from searx.testing import SearxTestCase -from urllib import quote_plus +from searx.url_utils import quote_plus class TestSoundcloudEngine(SearxTestCase): diff --git a/tests/unit/engines/test_startpage.py b/tests/unit/engines/test_startpage.py index 9a1a09bc7..a7a97785e 100644 --- a/tests/unit/engines/test_startpage.py +++ b/tests/unit/engines/test_startpage.py @@ -31,7 +31,7 @@ class TestStartpageEngine(SearxTestCase): self.assertRaises(AttributeError, startpage.response, '') self.assertRaises(AttributeError, startpage.response, '[]') - response = mock.Mock(content='<html></html>') + response = mock.Mock(text='<html></html>') self.assertEqual(startpage.response(response), []) html = """ @@ -62,7 +62,7 @@ class TestStartpageEngine(SearxTestCase): </p> </div> """ - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = startpage.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -133,7 +133,7 @@ class TestStartpageEngine(SearxTestCase): </p> </div> """ - response = mock.Mock(content=html) + response = mock.Mock(text=html.encode('utf-8')) results = startpage.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py index 27f33d70a..53890be78 100644 --- a/tests/unit/engines/test_swisscows.py +++ b/tests/unit/engines/test_swisscows.py @@ -33,13 +33,13 @@ class TestSwisscowsEngine(SearxTestCase): self.assertRaises(AttributeError, swisscows.response, '') self.assertRaises(AttributeError, swisscows.response, '[]') - response = mock.Mock(content='<html></html>') + response = mock.Mock(text=b'<html></html>') self.assertEqual(swisscows.response(response), []) - response = mock.Mock(content='<html></html>') + response = mock.Mock(text=b'<html></html>') self.assertEqual(swisscows.response(response), []) - html = u""" + html = b""" <script> App.Dispatcher.dispatch("initialize", { html5history: true, @@ -111,7 +111,7 @@ class TestSwisscowsEngine(SearxTestCase): }); </script> """ - response = mock.Mock(content=html) + response = mock.Mock(text=html) results = swisscows.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 3) diff --git a/tests/unit/engines/test_tokyotoshokan.py b/tests/unit/engines/test_tokyotoshokan.py index efe7dbfc2..b5c6fad17 100644 --- a/tests/unit/engines/test_tokyotoshokan.py +++ b/tests/unit/engines/test_tokyotoshokan.py @@ -91,7 +91,7 @@ class TestTokyotoshokanEngine(SearxTestCase): self.assertEqual(r['title'], 'Koyomimonogatari') self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4c19eb46b5113685fbd2288ed2531b0b') self.assertEqual(r['filesize'], int(1024 * 1024 * 10.5)) - self.assertEqual(r['publishedDate'], datetime(2016, 03, 26, 16, 41)) + self.assertEqual(r['publishedDate'], datetime(2016, 3, 26, 16, 41)) self.assertEqual(r['content'], 'Comment: sample comment') self.assertEqual(r['seed'], 53) self.assertEqual(r['leech'], 18) diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py index ec5f52ef9..aa69f116e 100644 --- a/tests/unit/engines/test_wikidata.py +++ b/tests/unit/engines/test_wikidata.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from json import loads from lxml.html import fromstring from collections import defaultdict import mock @@ -31,7 +30,7 @@ class TestWikidataEngine(SearxTestCase): self.assertRaises(AttributeError, wikidata.response, '') self.assertRaises(AttributeError, wikidata.response, '[]') - response = mock.Mock(content='<html></html>', search_params={"language": "all"}) + response = mock.Mock(text='<html></html>', search_params={"language": "all"}) self.assertEqual(wikidata.response(response), []) def test_getDetail(self): diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py index 988080b6a..7a86514c7 100644 --- a/tests/unit/engines/test_wikipedia.py +++ b/tests/unit/engines/test_wikipedia.py @@ -13,15 +13,15 @@ class TestWikipediaEngine(SearxTestCase): query = 'test_query' dicto = defaultdict(dict) dicto['language'] = 'fr-FR' - params = wikipedia.request(query, dicto) + params = wikipedia.request(query.encode('utf-8'), dicto) self.assertIn('url', params) self.assertIn(query, params['url']) self.assertIn('test_query', params['url']) self.assertIn('Test_Query', params['url']) self.assertIn('fr.wikipedia.org', params['url']) - query = 'Test_Query' - params = wikipedia.request(query, dicto) + query = u'Test_Query' + params = wikipedia.request(query.encode('utf-8'), dicto) self.assertIn('Test_Query', params['url']) self.assertNotIn('test_query', params['url']) @@ -57,7 +57,7 @@ class TestWikipediaEngine(SearxTestCase): } } }""" - response = mock.Mock(content=json, search_params=dicto) + response = mock.Mock(text=json, search_params=dicto) self.assertEqual(wikipedia.response(response), []) # normal case @@ -80,7 +80,7 @@ class TestWikipediaEngine(SearxTestCase): } } }""" - response = mock.Mock(content=json, search_params=dicto) + response = mock.Mock(text=json, search_params=dicto) results = wikipedia.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -108,10 +108,10 @@ class TestWikipediaEngine(SearxTestCase): } } }""" - response = mock.Mock(content=json, search_params=dicto) + response = mock.Mock(text=json, search_params=dicto) results = wikipedia.response(response) self.assertEqual(type(results), list) - self.assertEqual(len(results), 0) + self.assertEqual(len(results), 2) # no image json = """ @@ -130,7 +130,7 @@ class TestWikipediaEngine(SearxTestCase): } } }""" - response = mock.Mock(content=json, search_params=dicto) + response = mock.Mock(text=json, search_params=dicto) results = wikipedia.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -158,7 +158,7 @@ class TestWikipediaEngine(SearxTestCase): } } }""" - response = mock.Mock(content=json, search_params=dicto) + response = mock.Mock(text=json, search_params=dicto) results = wikipedia.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) diff --git a/tests/unit/engines/test_wolframalpha_api.py b/tests/unit/engines/test_wolframalpha_api.py index 64a64ceb3..30d337645 100644 --- a/tests/unit/engines/test_wolframalpha_api.py +++ b/tests/unit/engines/test_wolframalpha_api.py @@ -35,11 +35,11 @@ class TestWolframAlphaAPIEngine(SearxTestCase): xml = '''<?xml version='1.0' encoding='UTF-8'?> <queryresult success='false' error='false' /> ''' - response = mock.Mock(content=xml) + response = mock.Mock(text=xml.encode('utf-8')) self.assertEqual(wolframalpha_api.response(response), []) # test basic case - xml = """<?xml version='1.0' encoding='UTF-8'?> + xml = b"""<?xml version='1.0' encoding='UTF-8'?> <queryresult success='true' error='false' numpods='3' @@ -83,7 +83,7 @@ class TestWolframAlphaAPIEngine(SearxTestCase): </pod> </queryresult> """ - response = mock.Mock(content=xml, request=request) + response = mock.Mock(text=xml, request=request) results = wolframalpha_api.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) @@ -107,7 +107,7 @@ class TestWolframAlphaAPIEngine(SearxTestCase): self.assertIn('result_plaintext', results[1]['content']) # test calc - xml = """<?xml version='1.0' encoding='UTF-8'?> + xml = b"""<?xml version='1.0' encoding='UTF-8'?> <queryresult success='true' error='false' numpods='2' @@ -144,7 +144,7 @@ class TestWolframAlphaAPIEngine(SearxTestCase): </pod> </queryresult> """ - response = mock.Mock(content=xml, request=request) + response = mock.Mock(text=xml, request=request) results = wolframalpha_api.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 2) diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py index 78dcea478..e497371f8 100644 --- a/tests/unit/test_plugins.py +++ b/tests/unit/test_plugins.py @@ -48,11 +48,11 @@ class SelfIPTest(SearxTestCase): # IP test request = Mock(remote_addr='127.0.0.1') request.headers.getlist.return_value = [] - search = get_search_mock(query='ip', pageno=1) + search = get_search_mock(query=b'ip', pageno=1) store.call(store.plugins, 'post_search', request, search) self.assertTrue('127.0.0.1' in search.result_container.answers) - search = get_search_mock(query='ip', pageno=2) + search = get_search_mock(query=b'ip', pageno=2) store.call(store.plugins, 'post_search', request, search) self.assertFalse('127.0.0.1' in search.result_container.answers) @@ -60,26 +60,26 @@ class SelfIPTest(SearxTestCase): request = Mock(user_agent='Mock') request.headers.getlist.return_value = [] - search = get_search_mock(query='user-agent', pageno=1) + search = get_search_mock(query=b'user-agent', pageno=1) store.call(store.plugins, 'post_search', request, search) self.assertTrue('Mock' in search.result_container.answers) - search = get_search_mock(query='user-agent', pageno=2) + search = get_search_mock(query=b'user-agent', pageno=2) store.call(store.plugins, 'post_search', request, search) self.assertFalse('Mock' in search.result_container.answers) - search = get_search_mock(query='user-agent', pageno=1) + search = get_search_mock(query=b'user-agent', pageno=1) store.call(store.plugins, 'post_search', request, search) self.assertTrue('Mock' in search.result_container.answers) - search = get_search_mock(query='user-agent', pageno=2) + search = get_search_mock(query=b'user-agent', pageno=2) store.call(store.plugins, 'post_search', request, search) self.assertFalse('Mock' in search.result_container.answers) - search = get_search_mock(query='What is my User-Agent?', pageno=1) + search = get_search_mock(query=b'What is my User-Agent?', pageno=1) store.call(store.plugins, 'post_search', request, search) self.assertTrue('Mock' in search.result_container.answers) - search = get_search_mock(query='What is my User-Agent?', pageno=2) + search = get_search_mock(query=b'What is my User-Agent?', pageno=2) store.call(store.plugins, 'post_search', request, search) self.assertFalse('Mock' in search.result_container.answers) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 04480791d..eb40e62e2 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1,8 +1,12 @@ # -*- coding: utf-8 -*- import mock +import sys from searx.testing import SearxTestCase from searx import utils +if sys.version_info[0] == 3: + unicode = str + class TestUtils(SearxTestCase): @@ -30,9 +34,9 @@ class TestUtils(SearxTestCase): self.assertEqual(utils.highlight_content(content, None), content) content = 'a' - query = 'test' + query = b'test' self.assertEqual(utils.highlight_content(content, query), content) - query = 'a test' + query = b'a test' self.assertEqual(utils.highlight_content(content, query), content) def test_html_to_text(self): diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py index 5e5f0b4bf..45a08c1ba 100644 --- a/tests/unit/test_webapp.py +++ b/tests/unit/test_webapp.py @@ -2,10 +2,10 @@ import json from mock import Mock -from urlparse import ParseResult from searx import webapp from searx.testing import SearxTestCase from searx.search import Search +from searx.url_utils import ParseResult class ViewsTestCase(SearxTestCase): @@ -57,37 +57,35 @@ class ViewsTestCase(SearxTestCase): def test_index_empty(self): result = self.app.post('/') self.assertEqual(result.status_code, 200) - self.assertIn('<div class="title"><h1>searx</h1></div>', result.data) + self.assertIn(b'<div class="title"><h1>searx</h1></div>', result.data) def test_index_html(self): result = self.app.post('/', data={'q': 'test'}) self.assertIn( - '<h3 class="result_title"><img width="14" height="14" class="favicon" src="/static/themes/legacy/img/icons/icon_youtube.ico" alt="youtube" /><a href="http://second.test.xyz" rel="noreferrer">Second <span class="highlight">Test</span></a></h3>', # noqa + b'<h3 class="result_title"><img width="14" height="14" class="favicon" src="/static/themes/legacy/img/icons/icon_youtube.ico" alt="youtube" /><a href="http://second.test.xyz" rel="noreferrer">Second <span class="highlight">Test</span></a></h3>', # noqa result.data ) self.assertIn( - '<p class="content">first <span class="highlight">test</span> content<br class="last"/></p>', # noqa + b'<p class="content">first <span class="highlight">test</span> content<br class="last"/></p>', # noqa result.data ) def test_index_json(self): result = self.app.post('/', data={'q': 'test', 'format': 'json'}) - result_dict = json.loads(result.data) + result_dict = json.loads(result.data.decode('utf-8')) self.assertEqual('test', result_dict['query']) - self.assertEqual( - result_dict['results'][0]['content'], 'first test content') - self.assertEqual( - result_dict['results'][0]['url'], 'http://first.test.xyz') + self.assertEqual(result_dict['results'][0]['content'], 'first test content') + self.assertEqual(result_dict['results'][0]['url'], 'http://first.test.xyz') def test_index_csv(self): result = self.app.post('/', data={'q': 'test', 'format': 'csv'}) self.assertEqual( - 'title,url,content,host,engine,score\r\n' - 'First Test,http://first.test.xyz,first test content,first.test.xyz,startpage,\r\n' # noqa - 'Second Test,http://second.test.xyz,second test content,second.test.xyz,youtube,\r\n', # noqa + b'title,url,content,host,engine,score\r\n' + b'First Test,http://first.test.xyz,first test content,first.test.xyz,startpage,\r\n' # noqa + b'Second Test,http://second.test.xyz,second test content,second.test.xyz,youtube,\r\n', # noqa result.data ) @@ -95,65 +93,65 @@ class ViewsTestCase(SearxTestCase): result = self.app.post('/', data={'q': 'test', 'format': 'rss'}) self.assertIn( - '<description>Search results for "test" - searx</description>', + b'<description>Search results for "test" - searx</description>', result.data ) self.assertIn( - '<opensearch:totalResults>3</opensearch:totalResults>', + b'<opensearch:totalResults>3</opensearch:totalResults>', result.data ) self.assertIn( - '<title>First Test</title>', + b'<title>First Test</title>', result.data ) self.assertIn( - '<link>http://first.test.xyz</link>', + b'<link>http://first.test.xyz</link>', result.data ) self.assertIn( - '<description>first test content</description>', + b'<description>first test content</description>', result.data ) def test_about(self): result = self.app.get('/about') self.assertEqual(result.status_code, 200) - self.assertIn('<h1>About <a href="/">searx</a></h1>', result.data) + self.assertIn(b'<h1>About <a href="/">searx</a></h1>', result.data) def test_preferences(self): result = self.app.get('/preferences') self.assertEqual(result.status_code, 200) self.assertIn( - '<form method="post" action="/preferences" id="search_form">', + b'<form method="post" action="/preferences" id="search_form">', result.data ) self.assertIn( - '<legend>Default categories</legend>', + b'<legend>Default categories</legend>', result.data ) self.assertIn( - '<legend>Interface language</legend>', + b'<legend>Interface language</legend>', result.data ) def test_stats(self): result = self.app.get('/stats') self.assertEqual(result.status_code, 200) - self.assertIn('<h2>Engine stats</h2>', result.data) + self.assertIn(b'<h2>Engine stats</h2>', result.data) def test_robots_txt(self): result = self.app.get('/robots.txt') self.assertEqual(result.status_code, 200) - self.assertIn('Allow: /', result.data) + self.assertIn(b'Allow: /', result.data) def test_opensearch_xml(self): result = self.app.get('/opensearch.xml') self.assertEqual(result.status_code, 200) - self.assertIn('<Description>a privacy-respecting, hackable metasearch engine</Description>', result.data) + self.assertIn(b'<Description>a privacy-respecting, hackable metasearch engine</Description>', result.data) def test_favicon(self): result = self.app.get('/favicon.ico') |