summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Tauber <asciimoo@gmail.com>2016-11-30 18:43:03 +0100
committerAdam Tauber <asciimoo@gmail.com>2017-05-15 12:02:30 +0200
commit52e615dede8538c36f569d2cf07835427a9a0db6 (patch)
treeac65990c72156def2d49e81d981f0b3beda4fd2e
parent46a2c63f8e1c3819cceff2d61fe9106051e8ecee (diff)
downloadsearxng-52e615dede8538c36f569d2cf07835427a9a0db6.tar.gz
searxng-52e615dede8538c36f569d2cf07835427a9a0db6.zip
[enh] py3 compatibility
-rw-r--r--.travis.yml5
-rw-r--r--requirements-dev.txt3
-rw-r--r--searx/answerers/__init__.py12
-rw-r--r--searx/answerers/random/answerer.py13
-rw-r--r--searx/answerers/statistics/answerer.py16
-rw-r--r--searx/autocomplete.py6
-rw-r--r--searx/engines/1337x.py3
-rw-r--r--searx/engines/__init__.py5
-rw-r--r--searx/engines/archlinux.py3
-rwxr-xr-xsearx/engines/base.py6
-rw-r--r--searx/engines/bing.py2
-rw-r--r--searx/engines/bing_images.py2
-rw-r--r--searx/engines/bing_news.py5
-rw-r--r--searx/engines/blekko_images.py2
-rw-r--r--searx/engines/btdigg.py5
-rw-r--r--searx/engines/currency_convert.py14
-rw-r--r--searx/engines/dailymotion.py3
-rw-r--r--searx/engines/deezer.py5
-rw-r--r--searx/engines/deviantart.py2
-rw-r--r--searx/engines/dictzone.py6
-rw-r--r--searx/engines/digbt.py8
-rw-r--r--searx/engines/digg.py4
-rw-r--r--searx/engines/doku.py2
-rw-r--r--searx/engines/duckduckgo.py2
-rw-r--r--searx/engines/duckduckgo_definitions.py6
-rw-r--r--searx/engines/faroo.py2
-rw-r--r--searx/engines/fdroid.py7
-rw-r--r--searx/engines/filecrop.py11
-rw-r--r--searx/engines/flickr.py2
-rw-r--r--searx/engines/flickr_noapi.py2
-rw-r--r--searx/engines/framalibre.py4
-rw-r--r--searx/engines/frinkiac.py2
-rw-r--r--searx/engines/gigablast.py3
-rw-r--r--searx/engines/github.py2
-rw-r--r--searx/engines/google.py5
-rw-r--r--searx/engines/google_images.py2
-rw-r--r--searx/engines/google_news.py3
-rw-r--r--searx/engines/ina.py10
-rw-r--r--searx/engines/json_engine.py11
-rw-r--r--searx/engines/kickass.py3
-rw-r--r--searx/engines/mediawiki.py2
-rw-r--r--searx/engines/mixcloud.py2
-rw-r--r--searx/engines/nyaa.py2
-rw-r--r--searx/engines/openstreetmap.py4
-rw-r--r--searx/engines/photon.py2
-rw-r--r--searx/engines/piratebay.py3
-rw-r--r--searx/engines/qwant.py3
-rw-r--r--searx/engines/reddit.py6
-rw-r--r--searx/engines/scanr_structures.py4
-rw-r--r--searx/engines/searchcode_code.py5
-rw-r--r--searx/engines/searchcode_doc.py5
-rw-r--r--searx/engines/seedpeer.py4
-rw-r--r--searx/engines/soundcloud.py19
-rw-r--r--searx/engines/spotify.py5
-rw-r--r--searx/engines/stackoverflow.py6
-rw-r--r--searx/engines/startpage.py2
-rw-r--r--searx/engines/subtitleseeker.py2
-rw-r--r--searx/engines/swisscows.py27
-rw-r--r--searx/engines/tokyotoshokan.py11
-rw-r--r--searx/engines/torrentz.py8
-rw-r--r--searx/engines/translated.py4
-rw-r--r--searx/engines/twitter.py3
-rw-r--r--searx/engines/vimeo.py2
-rw-r--r--searx/engines/wikidata.py13
-rw-r--r--searx/engines/wikipedia.py21
-rw-r--r--searx/engines/wolframalpha_api.py13
-rw-r--r--searx/engines/wolframalpha_noapi.py9
-rw-r--r--searx/engines/www1x.py6
-rw-r--r--searx/engines/www500px.py3
-rw-r--r--searx/engines/xpath.py4
-rw-r--r--searx/engines/yacy.py2
-rw-r--r--searx/engines/yahoo.py3
-rw-r--r--searx/engines/yahoo_news.py6
-rw-r--r--searx/engines/yandex.py4
-rw-r--r--searx/engines/youtube_api.py2
-rw-r--r--searx/engines/youtube_noapi.py2
-rw-r--r--searx/plugins/__init__.py5
-rw-r--r--searx/plugins/doai_rewrite.py2
-rw-r--r--searx/plugins/https_rewrite.py5
-rw-r--r--searx/plugins/self_info.py4
-rw-r--r--searx/plugins/tracker_url_remover.py2
-rw-r--r--searx/preferences.py18
-rw-r--r--searx/query.py8
-rw-r--r--searx/results.py6
-rw-r--r--searx/search.py12
-rw-r--r--searx/settings_robot.yml2
-rw-r--r--searx/templates/courgette/404.html2
-rw-r--r--searx/templates/legacy/404.html2
-rw-r--r--searx/templates/oscar/404.html2
-rw-r--r--searx/templates/pix-art/404.html2
-rw-r--r--searx/testing.py42
-rw-r--r--searx/url_utils.py28
-rw-r--r--searx/utils.py26
-rw-r--r--searx/webapp.py36
-rw-r--r--tests/robot/__init__.py75
-rw-r--r--tests/robot/test_basic.robot153
-rw-r--r--tests/unit/engines/test_archlinux.py4
-rw-r--r--tests/unit/engines/test_bing.py6
-rw-r--r--tests/unit/engines/test_bing_news.py12
-rw-r--r--tests/unit/engines/test_btdigg.py12
-rw-r--r--tests/unit/engines/test_currency_convert.py4
-rw-r--r--tests/unit/engines/test_digbt.py4
-rw-r--r--tests/unit/engines/test_duckduckgo.py3
-rw-r--r--tests/unit/engines/test_frinkiac.py5
-rw-r--r--tests/unit/engines/test_gigablast.py1
-rw-r--r--tests/unit/engines/test_soundcloud.py2
-rw-r--r--tests/unit/engines/test_startpage.py6
-rw-r--r--tests/unit/engines/test_swisscows.py8
-rw-r--r--tests/unit/engines/test_tokyotoshokan.py2
-rw-r--r--tests/unit/engines/test_wikidata.py3
-rw-r--r--tests/unit/engines/test_wikipedia.py18
-rw-r--r--tests/unit/engines/test_wolframalpha_api.py10
-rw-r--r--tests/unit/test_plugins.py16
-rw-r--r--tests/unit/test_utils.py8
-rw-r--r--tests/unit/test_webapp.py46
115 files changed, 517 insertions, 513 deletions
diff --git a/.travis.yml b/.travis.yml
index 0a174ff66..b6017cd93 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,6 +9,7 @@ addons:
language: python
python:
- "2.7"
+ - "3.6"
before_install:
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
@@ -24,9 +25,9 @@ script:
- ./manage.sh styles
- ./manage.sh grunt_build
- ./manage.sh tests
- - ./manage.sh py_test_coverage
after_success:
- coveralls
+ - ./manage.sh py_test_coverage
+ - coveralls
notifications:
irc:
channels:
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 01d1e1497..691a1e7ba 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -3,8 +3,7 @@ mock==2.0.0
nose2[coverage-plugin]
pep8==1.7.0
plone.testing==5.0.0
-robotframework-selenium2library==1.8.0
-robotsuite==1.7.0
+splinter==0.7.5
transifex-client==0.12.2
unittest2==1.1.0
zope.testrunner==4.5.1
diff --git a/searx/answerers/__init__.py b/searx/answerers/__init__.py
index 8f5951c75..444316f11 100644
--- a/searx/answerers/__init__.py
+++ b/searx/answerers/__init__.py
@@ -1,8 +1,12 @@
from os import listdir
from os.path import realpath, dirname, join, isdir
+from sys import version_info
from searx.utils import load_module
from collections import defaultdict
+if version_info[0] == 3:
+ unicode = str
+
answerers_dir = dirname(realpath(__file__))
@@ -10,7 +14,7 @@ answerers_dir = dirname(realpath(__file__))
def load_answerers():
answerers = []
for filename in listdir(answerers_dir):
- if not isdir(join(answerers_dir, filename)):
+ if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
continue
module = load_module('answerer.py', join(answerers_dir, filename))
if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
@@ -30,12 +34,12 @@ def get_answerers_by_keywords(answerers):
def ask(query):
results = []
- query_parts = filter(None, query.query.split())
+ query_parts = list(filter(None, query.query.split()))
- if query_parts[0] not in answerers_by_keywords:
+ if query_parts[0].decode('utf-8') not in answerers_by_keywords:
return results
- for answerer in answerers_by_keywords[query_parts[0]]:
+ for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
result = answerer(query)
if result:
results.append(result)
diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py
index 510d9f5be..f2b8bf3e5 100644
--- a/searx/answerers/random/answerer.py
+++ b/searx/answerers/random/answerer.py
@@ -1,5 +1,6 @@
import random
import string
+import sys
from flask_babel import gettext
# required answerer attribute
@@ -8,7 +9,11 @@ keywords = ('random',)
random_int_max = 2**31
-random_string_letters = string.lowercase + string.digits + string.uppercase
+if sys.version_info[0] == 2:
+ random_string_letters = string.lowercase + string.digits + string.uppercase
+else:
+ unicode = str
+ random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
def random_string():
@@ -24,9 +29,9 @@ def random_int():
return unicode(random.randint(-random_int_max, random_int_max))
-random_types = {u'string': random_string,
- u'int': random_int,
- u'float': random_float}
+random_types = {b'string': random_string,
+ b'int': random_int,
+ b'float': random_float}
# required answerer function
diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py
index a04695f56..73dd25cfd 100644
--- a/searx/answerers/statistics/answerer.py
+++ b/searx/answerers/statistics/answerer.py
@@ -1,8 +1,12 @@
+from sys import version_info
from functools import reduce
from operator import mul
from flask_babel import gettext
+if version_info[0] == 3:
+ unicode = str
+
keywords = ('min',
'max',
'avg',
@@ -19,22 +23,22 @@ def answer(query):
return []
try:
- args = map(float, parts[1:])
+ args = list(map(float, parts[1:]))
except:
return []
func = parts[0]
answer = None
- if func == 'min':
+ if func == b'min':
answer = min(args)
- elif func == 'max':
+ elif func == b'max':
answer = max(args)
- elif func == 'avg':
+ elif func == b'avg':
answer = sum(args) / len(args)
- elif func == 'sum':
+ elif func == b'sum':
answer = sum(args)
- elif func == 'prod':
+ elif func == b'prod':
answer = reduce(mul, args, 1)
if answer is None:
diff --git a/searx/autocomplete.py b/searx/autocomplete.py
index b360af9f6..de0623a8a 100644
--- a/searx/autocomplete.py
+++ b/searx/autocomplete.py
@@ -18,7 +18,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from lxml import etree
from json import loads
-from urllib import urlencode
from searx import settings
from searx.languages import language_codes
from searx.engines import (
@@ -26,6 +25,11 @@ from searx.engines import (
)
from searx.poolrequests import get as http_get
+try:
+ from urllib import urlencode
+except:
+ from urllib.parse import urlencode
+
def get(*args, **kwargs):
if 'timeout' not in kwargs:
diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py
index c6bc3cb6d..0de04bd95 100644
--- a/searx/engines/1337x.py
+++ b/searx/engines/1337x.py
@@ -1,8 +1,7 @@
-from urllib import quote
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
-from urlparse import urljoin
+from searx.url_utils import quote, urljoin
url = 'https://1337x.to/'
search_url = url + 'search/{search_term}/{pageno}/'
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 77184a282..023ec409a 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -72,12 +72,11 @@ def load_engine(engine_data):
if engine_data['categories'] == 'none':
engine.categories = []
else:
- engine.categories = map(
- str.strip, engine_data['categories'].split(','))
+ engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
continue
setattr(engine, param_name, engine_data[param_name])
- for arg_name, arg_value in engine_default_args.iteritems():
+ for arg_name, arg_value in engine_default_args.items():
if not hasattr(engine, arg_name):
setattr(engine, arg_name, arg_value)
diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py
index dca825790..cad06f8c6 100644
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
@@ -11,10 +11,9 @@
@parse url, title
"""
-from urlparse import urljoin
-from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']
diff --git a/searx/engines/base.py b/searx/engines/base.py
index a552453ce..ff006a3bc 100755
--- a/searx/engines/base.py
+++ b/searx/engines/base.py
@@ -14,10 +14,10 @@
"""
from lxml import etree
-from urllib import urlencode
-from searx.utils import searx_useragent
from datetime import datetime
import re
+from searx.url_utils import urlencode
+from searx.utils import searx_useragent
categories = ['science']
@@ -73,7 +73,7 @@ def request(query, params):
def response(resp):
results = []
- search_results = etree.XML(resp.content)
+ search_results = etree.XML(resp.text)
for entry in search_results.xpath('./result/doc'):
content = "No description available"
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index 4e7ead82d..052d567ea 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -13,9 +13,9 @@
@todo publishedDate
"""
-from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
# engine dependent config
categories = ['general']
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 97f6dca37..e79740e50 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -15,11 +15,11 @@
limited response to 10 images
"""
-from urllib import urlencode
from lxml import html
from json import loads
import re
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
# engine dependent config
categories = ['images']
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index 765bcd38e..8e3cc517e 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -11,13 +11,12 @@
@parse url, title, content, publishedDate, thumbnail
"""
-from urllib import urlencode
-from urlparse import urlparse, parse_qsl
from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode, urlparse, parse_qsl
# engine dependent config
categories = ['news']
@@ -86,7 +85,7 @@ def request(query, params):
def response(resp):
results = []
- rss = etree.fromstring(resp.content)
+ rss = etree.fromstring(resp.text)
ns = rss.nsmap
diff --git a/searx/engines/blekko_images.py b/searx/engines/blekko_images.py
index c0664f390..f71645634 100644
--- a/searx/engines/blekko_images.py
+++ b/searx/engines/blekko_images.py
@@ -11,7 +11,7 @@
"""
from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
# engine dependent config
categories = ['images']
diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py
index 33c8355de..40438673f 100644
--- a/searx/engines/btdigg.py
+++ b/searx/engines/btdigg.py
@@ -10,11 +10,10 @@
@parse url, title, content, seed, leech, magnetlink
"""
-from urlparse import urljoin
-from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
+from searx.url_utils import quote, urljoin
from searx.utils import get_torrent_size
# engine dependent config
@@ -38,7 +37,7 @@ def request(query, params):
def response(resp):
results = []
- dom = html.fromstring(resp.content)
+ dom = html.fromstring(resp.text)
search_res = dom.xpath('//div[@id="search_res"]/table/tr')
diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py
index bc839cfb5..1218d4849 100644
--- a/searx/engines/currency_convert.py
+++ b/searx/engines/currency_convert.py
@@ -1,21 +1,25 @@
-from datetime import datetime
+import json
import re
import os
-import json
+import sys
import unicodedata
+from datetime import datetime
+
+if sys.version_info[0] == 3:
+ unicode = str
categories = []
url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
weight = 100
-parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa
+parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
db = 1
def normalize_name(name):
- name = name.lower().replace('-', ' ').rstrip('s')
+ name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()
@@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language):
def request(query, params):
- m = parser_re.match(unicode(query, 'utf8'))
+ m = parser_re.match(query)
if not m:
# wrong query
return params
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index 8c69aafe0..fad7e596c 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -12,10 +12,9 @@
@todo set content-parameter with correct data
"""
-from urllib import urlencode
from json import loads
from datetime import datetime
-from requests import get
+from searx.url_utils import urlencode
# engine dependent config
categories = ['videos']
diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py
index 3db1af3d2..af63478fb 100644
--- a/searx/engines/deezer.py
+++ b/searx/engines/deezer.py
@@ -11,7 +11,7 @@
"""
from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
# engine dependent config
categories = ['music']
@@ -30,8 +30,7 @@ embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true"
def request(query, params):
offset = (params['pageno'] - 1) * 25
- params['url'] = search_url.format(query=urlencode({'q': query}),
- offset=offset)
+ params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
return params
diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py
index a24b75b8a..bb85c6dc5 100644
--- a/searx/engines/deviantart.py
+++ b/searx/engines/deviantart.py
@@ -12,10 +12,10 @@
@todo rewrite to api
"""
-from urllib import urlencode
from lxml import html
import re
from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
# engine dependent config
categories = ['images']
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
index 20a9a8980..7c3478629 100644
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
@@ -10,20 +10,20 @@
"""
import re
-from urlparse import urljoin
from lxml import html
from searx.utils import is_valid_lang
+from searx.url_utils import urljoin
categories = ['general']
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100
-parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
+parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
results_xpath = './/table[@id="r"]/tr'
def request(query, params):
- m = parser_re.match(unicode(query, 'utf8'))
+ m = parser_re.match(query)
if not m:
return params
diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py
index b55d7747a..ff2f94593 100644
--- a/searx/engines/digbt.py
+++ b/searx/engines/digbt.py
@@ -10,10 +10,14 @@
@parse url, title, content, magnetlink
"""
-from urlparse import urljoin
+from sys import version_info
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
+from searx.url_utils import urljoin
+
+if version_info[0] == 3:
+ unicode = str
categories = ['videos', 'music', 'files']
paging = True
@@ -31,7 +35,7 @@ def request(query, params):
def response(resp):
- dom = html.fromstring(resp.content)
+ dom = html.fromstring(resp.text)
search_res = dom.xpath('.//td[@class="x-item"]')
if not search_res:
diff --git a/searx/engines/digg.py b/searx/engines/digg.py
index 238b466a0..606747a4d 100644
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
@@ -10,10 +10,10 @@
@parse url, title, content, publishedDate, thumbnail
"""
-from urllib import quote_plus
+from dateutil import parser
from json import loads
from lxml import html
-from dateutil import parser
+from searx.url_utils import quote_plus
# engine dependent config
categories = ['news', 'social media']
diff --git a/searx/engines/doku.py b/searx/engines/doku.py
index 93867fd0d..a391be444 100644
--- a/searx/engines/doku.py
+++ b/searx/engines/doku.py
@@ -9,9 +9,9 @@
# @stable yes
# @parse (general) url, title, content
-from urllib import urlencode
from lxml.html import fromstring
from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
# engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 1ae484123..1872ab7d4 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -13,11 +13,11 @@
@todo rewrite to api
"""
-from urllib import urlencode
from lxml.html import fromstring
from requests import get
from json import loads
from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
# engine dependent config
categories = ['general']
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index dd3f12e1e..21c6a6578 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -1,10 +1,10 @@
import json
-from urllib import urlencode
-from re import compile, sub
from lxml import html
-from searx.utils import html_to_text
+from re import compile
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
+from searx.utils import html_to_text
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
diff --git a/searx/engines/faroo.py b/searx/engines/faroo.py
index 9fa244e77..e24d1b7dc 100644
--- a/searx/engines/faroo.py
+++ b/searx/engines/faroo.py
@@ -10,10 +10,10 @@
@parse url, title, content, publishedDate, img_src
"""
-from urllib import urlencode
from json import loads
import datetime
from searx.utils import searx_useragent
+from searx.url_utils import urlencode
# engine dependent config
categories = ['general', 'news']
diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py
index 6d470a4eb..a6b01a8ee 100644
--- a/searx/engines/fdroid.py
+++ b/searx/engines/fdroid.py
@@ -9,9 +9,9 @@
@parse url, title, content
"""
-from urllib import urlencode
-from searx.engines.xpath import extract_text
from lxml import html
+from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
# engine dependent config
categories = ['files']
@@ -24,8 +24,7 @@ search_url = base_url + 'repository/browse/?{query}'
# do search-request
def request(query, params):
- query = urlencode({'fdfilter': query,
- 'fdpage': params['pageno']})
+ query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
params['url'] = search_url.format(query=query)
return params
diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py
index 71665bd4e..ed57a6bf3 100644
--- a/searx/engines/filecrop.py
+++ b/searx/engines/filecrop.py
@@ -1,5 +1,9 @@
-from urllib import urlencode
-from HTMLParser import HTMLParser
+from searx.url_utils import urlencode
+
+try:
+ from HTMLParser import HTMLParser
+except:
+ from html.parser import HTMLParser
url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
@@ -73,8 +77,7 @@ class FilecropResultParser(HTMLParser):
def request(query, params):
index = 1 + (params['pageno'] - 1) * 30
- params['url'] = search_url.format(query=urlencode({'w': query}),
- index=index)
+ params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
return params
diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py
index 5ce1160e9..de1769370 100644
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
@@ -13,8 +13,8 @@
More info on api-key : https://www.flickr.com/services/apps/create/
"""
-from urllib import urlencode
from json import loads
+from searx.url_utils import urlencode
categories = ['images']
diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py
index 3c0ec7b70..08f07f7ce 100644
--- a/searx/engines/flickr_noapi.py
+++ b/searx/engines/flickr_noapi.py
@@ -12,11 +12,11 @@
@parse url, title, thumbnail, img_src
"""
-from urllib import urlencode
from json import loads
from time import time
import re
from searx.engines import logger
+from searx.url_utils import urlencode
logger = logger.getChild('flickr-noapi')
diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py
index e8d1d8aa7..f2eecdc73 100644
--- a/searx/engines/framalibre.py
+++ b/searx/engines/framalibre.py
@@ -10,12 +10,10 @@
@parse url, title, content, thumbnail, img_src
"""
-from urlparse import urljoin
from cgi import escape
-from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
-from dateutil import parser
+from searx.url_utils import urljoin, urlencode
# engine dependent config
categories = ['it']
diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py
index a9383f862..a67b42dbe 100644
--- a/searx/engines/frinkiac.py
+++ b/searx/engines/frinkiac.py
@@ -10,7 +10,7 @@ Frinkiac (Images)
"""
from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
categories = ['images']
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
index 0c1d7f613..37933c69b 100644
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
@@ -11,10 +11,9 @@
"""
from json import loads
-from random import randint
from time import time
-from urllib import urlencode
from lxml.html import fromstring
+from searx.url_utils import urlencode
# engine dependent config
categories = ['general']
diff --git a/searx/engines/github.py b/searx/engines/github.py
index 7adef3be9..eaa00da4f 100644
--- a/searx/engines/github.py
+++ b/searx/engines/github.py
@@ -10,8 +10,8 @@
@parse url, title, content
"""
-from urllib import urlencode
from json import loads
+from searx.url_utils import urlencode
# engine dependent config
categories = ['it']
diff --git a/searx/engines/google.py b/searx/engines/google.py
index e14e9e702..934f5c29a 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -9,11 +9,10 @@
# @parse url, title, content, suggestion
import re
-from urllib import urlencode
-from urlparse import urlparse, parse_qsl
from lxml import html, etree
from searx.engines.xpath import extract_text, extract_url
-from searx.search import logger
+from searx import logger
+from searx.url_utils import urlencode, urlparse, parse_qsl
logger = logger.getChild('google engine')
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index 9a3c71c7e..9692f4b82 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -11,9 +11,9 @@
"""
from datetime import date, timedelta
-from urllib import urlencode
from json import loads
from lxml import html
+from searx.url_utils import urlencode
# engine dependent config
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index 6b79ff5c8..7344b5289 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -11,9 +11,8 @@
"""
from lxml import html
-from urllib import urlencode
-from json import loads
from searx.engines.google import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
# search-url
categories = ['news']
diff --git a/searx/engines/ina.py b/searx/engines/ina.py
index 86a39782b..37a05f099 100644
--- a/searx/engines/ina.py
+++ b/searx/engines/ina.py
@@ -12,11 +12,15 @@
# @todo embedded (needs some md5 from video page)
from json import loads
-from urllib import urlencode
from lxml import html
-from HTMLParser import HTMLParser
-from searx.engines.xpath import extract_text
from dateutil import parser
+from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
+
+try:
+ from HTMLParser import HTMLParser
+except:
+ from html.parser import HTMLParser
# engine dependent config
categories = ['videos']
diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py
index 4604c3cac..67d6a5a65 100644
--- a/searx/engines/json_engine.py
+++ b/searx/engines/json_engine.py
@@ -1,11 +1,16 @@
-from urllib import urlencode
-from json import loads
from collections import Iterable
+from json import loads
+from sys import version_info
+from searx.url_utils import urlencode
+
+if version_info[0] == 3:
+ unicode = str
search_url = None
url_query = None
content_query = None
title_query = None
+paging = False
suggestion_query = ''
results_query = ''
@@ -20,7 +25,7 @@ first_page_num = 1
def iterate(iterable):
if type(iterable) == dict:
- it = iterable.iteritems()
+ it = iterable.items()
else:
it = enumerate(iterable)
diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py
index 059fa2a66..5e897c96f 100644
--- a/searx/engines/kickass.py
+++ b/searx/engines/kickass.py
@@ -10,12 +10,11 @@
@parse url, title, content, seed, leech, magnetlink
"""
-from urlparse import urljoin
-from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size, convert_str_to_int
+from searx.url_utils import quote, urljoin
# engine dependent config
categories = ['videos', 'music', 'files']
diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py
index 93d98d3aa..5a70204b1 100644
--- a/searx/engines/mediawiki.py
+++ b/searx/engines/mediawiki.py
@@ -14,7 +14,7 @@
from json import loads
from string import Formatter
-from urllib import urlencode, quote
+from searx.url_utils import urlencode, quote
# engine dependent config
categories = ['general']
diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py
index 312d297eb..470c007ea 100644
--- a/searx/engines/mixcloud.py
+++ b/searx/engines/mixcloud.py
@@ -11,8 +11,8 @@
"""
from json import loads
-from urllib import urlencode
from dateutil import parser
+from searx.url_utils import urlencode
# engine dependent config
categories = ['music']
diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py
index 4ca5b3171..272c712c4 100644
--- a/searx/engines/nyaa.py
+++ b/searx/engines/nyaa.py
@@ -9,9 +9,9 @@
@parse url, title, content, seed, leech, torrentfile
"""
-from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
# engine dependent config
categories = ['files', 'images', 'videos', 'music']
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 01ca7d42d..733ba6203 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -11,7 +11,6 @@
"""
from json import loads
-from searx.utils import searx_useragent
# engine dependent config
categories = ['map']
@@ -27,9 +26,6 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
def request(query, params):
params['url'] = base_url + search_string.format(query=query)
- # using searx User-Agent
- params['headers']['User-Agent'] = searx_useragent()
-
return params
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
index a029bbfef..15236f680 100644
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@@ -10,9 +10,9 @@
@parse url, title
"""
-from urllib import urlencode
from json import loads
from searx.utils import searx_useragent
+from searx.url_utils import urlencode
# engine dependent config
categories = ['map']
diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py
index ca21a3bb2..a5af8d824 100644
--- a/searx/engines/piratebay.py
+++ b/searx/engines/piratebay.py
@@ -8,11 +8,10 @@
# @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink
-from urlparse import urljoin
-from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
+from searx.url_utils import quote, urljoin
# engine dependent config
categories = ['videos', 'music', 'files']
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index 1fc4630fa..cb097eb38 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -12,9 +12,8 @@
from datetime import datetime
from json import loads
-from urllib import urlencode
-
from searx.utils import html_to_text
+from searx.url_utils import urlencode
# engine dependent config
categories = None
diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py
index b29792a3a..d19724906 100644
--- a/searx/engines/reddit.py
+++ b/searx/engines/reddit.py
@@ -11,9 +11,8 @@
"""
import json
-from urllib import urlencode
-from urlparse import urlparse, urljoin
from datetime import datetime
+from searx.url_utils import urlencode, urljoin, urlparse
# engine dependent config
categories = ['general', 'images', 'news', 'social media']
@@ -26,8 +25,7 @@ search_url = base_url + 'search.json?{query}'
# do search-request
def request(query, params):
- query = urlencode({'q': query,
- 'limit': page_size})
+ query = urlencode({'q': query, 'limit': page_size})
params['url'] = search_url.format(query=query)
return params
diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py
index ad78155ac..72fd2b3c9 100644
--- a/searx/engines/scanr_structures.py
+++ b/searx/engines/scanr_structures.py
@@ -10,9 +10,7 @@
@parse url, title, content, img_src
"""
-from urllib import urlencode
from json import loads, dumps
-from dateutil import parser
from searx.utils import html_to_text
# engine dependent config
@@ -48,7 +46,7 @@ def response(resp):
search_res = loads(resp.text)
# return empty array if there are no results
- if search_res.get('total') < 1:
+ if search_res.get('total', 0) < 1:
return []
# parse results
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
index be7a6d385..789e8e7a9 100644
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@@ -10,8 +10,8 @@
@parse url, title, content
"""
-from urllib import urlencode
from json import loads
+from searx.url_utils import urlencode
# engine dependent config
@@ -31,8 +31,7 @@ code_endings = {'cs': 'c#',
# do search-request
def request(query, params):
- params['url'] = search_url.format(query=urlencode({'q': query}),
- pageno=params['pageno'] - 1)
+ params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
return params
diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py
index 99e10be62..4b8e9a84a 100644
--- a/searx/engines/searchcode_doc.py
+++ b/searx/engines/searchcode_doc.py
@@ -10,8 +10,8 @@
@parse url, title, content
"""
-from urllib import urlencode
from json import loads
+from searx.url_utils import urlencode
# engine dependent config
categories = ['it']
@@ -24,8 +24,7 @@ search_url = url + 'api/search_IV/?{query}&p={pageno}'
# do search-request
def request(query, params):
- params['url'] = search_url.format(query=urlencode({'q': query}),
- pageno=params['pageno'] - 1)
+ params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
return params
diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py
index e1309a9b5..3770dacac 100644
--- a/searx/engines/seedpeer.py
+++ b/searx/engines/seedpeer.py
@@ -8,11 +8,9 @@
# @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink
-from urlparse import urljoin
-from urllib import quote
from lxml import html
from operator import itemgetter
-from searx.engines.xpath import extract_text
+from searx.url_utils import quote, urljoin
url = 'http://www.seedpeer.eu/'
diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py
index 62b03ac03..41b40da61 100644
--- a/searx/engines/soundcloud.py
+++ b/searx/engines/soundcloud.py
@@ -11,13 +11,17 @@
"""
import re
-from StringIO import StringIO
from json import loads
-from lxml import etree
-from urllib import urlencode, quote_plus
+from lxml import html
from dateutil import parser
from searx import logger
from searx.poolrequests import get as http_get
+from searx.url_utils import quote_plus, urlencode
+
+try:
+ from cStringIO import StringIO
+except:
+ from io import StringIO
# engine dependent config
categories = ['music']
@@ -36,14 +40,15 @@ embedded_url = '<iframe width="100%" height="166" ' +\
'scrolling="no" frameborder="no" ' +\
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
+cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
+
def get_client_id():
response = http_get("https://soundcloud.com")
- rx_namespace = {"re": "http://exslt.org/regular-expressions"}
if response.ok:
- tree = etree.parse(StringIO(response.content), etree.HTMLParser())
- script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
+ tree = html.fromstring(response.content)
+ script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
# extracts valid app_js urls from soundcloud.com content
@@ -51,7 +56,7 @@ def get_client_id():
# gets app_js and searches for the clientid
response = http_get(app_js_url)
if response.ok:
- cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
+ cids = cid_re.search(response.text)
if cids is not None and len(cids.groups()):
return cids.groups()[0]
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py
index 249ba91ef..aed756be3 100644
--- a/searx/engines/spotify.py
+++ b/searx/engines/spotify.py
@@ -11,7 +11,7 @@
"""
from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
# engine dependent config
categories = ['music']
@@ -29,8 +29,7 @@ embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{
def request(query, params):
offset = (params['pageno'] - 1) * 20
- params['url'] = search_url.format(query=urlencode({'q': query}),
- offset=offset)
+ params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
return params
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py
index 5e7ab2901..25875aa15 100644
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@@ -10,10 +10,9 @@
@parse url, title, content
"""
-from urlparse import urljoin
-from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']
@@ -31,8 +30,7 @@ content_xpath = './/div[@class="excerpt"]'
# do search-request
def request(query, params):
- params['url'] = search_url.format(query=urlencode({'q': query}),
- pageno=params['pageno'])
+ params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
return params
diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py
index 54aafdee5..314b7b9a8 100644
--- a/searx/engines/startpage.py
+++ b/searx/engines/startpage.py
@@ -56,7 +56,7 @@ def request(query, params):
def response(resp):
results = []
- dom = html.fromstring(resp.content)
+ dom = html.fromstring(resp.text)
# parse results
for result in dom.xpath(results_xpath):
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index 77b010c3f..2cbc991b3 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -10,10 +10,10 @@
@parse url, title, content
"""
-from urllib import quote_plus
from lxml import html
from searx.languages import language_codes
from searx.engines.xpath import extract_text
+from searx.url_utils import quote_plus
# engine dependent config
categories = ['videos']
diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py
index dd398857f..e9c13ca24 100644
--- a/searx/engines/swisscows.py
+++ b/searx/engines/swisscows.py
@@ -11,9 +11,9 @@
"""
from json import loads
-from urllib import urlencode, unquote
import re
from lxml.html import fromstring
+from searx.url_utils import unquote, urlencode
# engine dependent config
categories = ['general', 'images']
@@ -27,10 +27,10 @@ search_string = '?{query}&page={page}'
supported_languages_url = base_url
# regex
-regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
-regex_json_remove_start = re.compile(r'^initialData:\s*')
-regex_json_remove_end = re.compile(r',\s*environment$')
-regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
+regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment')
+regex_json_remove_start = re.compile(b'^initialData:\s*')
+regex_json_remove_end = re.compile(b',\s*environment$')
+regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
# do search-request
@@ -45,10 +45,9 @@ def request(query, params):
ui_language = params['language'].split('-')[0]
search_path = search_string.format(
- query=urlencode({'query': query,
- 'uiLanguage': ui_language,
- 'region': region}),
- page=params['pageno'])
+ query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
+ page=params['pageno']
+ )
# image search query is something like 'image?{query}&page={page}'
if params['category'] == 'images':
@@ -63,14 +62,14 @@ def request(query, params):
def response(resp):
results = []
- json_regex = regex_json.search(resp.content)
+ json_regex = regex_json.search(resp.text)
# check if results are returned
if not json_regex:
return []
- json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
- json = loads(json_raw)
+ json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group()))
+ json = loads(json_raw.decode('utf-8'))
# parse results
for result in json['Results'].get('items', []):
@@ -78,7 +77,7 @@ def response(resp):
# parse image results
if result.get('ContentType', '').startswith('image'):
- img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
+ img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
# append result
results.append({'url': result['SourceUrl'],
@@ -100,7 +99,7 @@ def response(resp):
# parse images
for result in json.get('Images', []):
# decode image url
- img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
+ img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
# append result
results.append({'url': result['SourceUrl'],
diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py
index 52b2cbe07..9a6b5e57d 100644
--- a/searx/engines/tokyotoshokan.py
+++ b/searx/engines/tokyotoshokan.py
@@ -11,11 +11,11 @@
"""
import re
-from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from datetime import datetime
from searx.engines.nyaa import int_or_zero, get_filesize_mul
+from searx.url_utils import urlencode
# engine dependent config
categories = ['files', 'videos', 'music']
@@ -28,8 +28,7 @@ search_url = base_url + 'search.php?{query}'
# do search-request
def request(query, params):
- query = urlencode({'page': params['pageno'],
- 'terms': query})
+ query = urlencode({'page': params['pageno'], 'terms': query})
params['url'] = search_url.format(query=query)
return params
@@ -50,7 +49,7 @@ def response(resp):
size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
# processing the results, two rows at a time
- for i in xrange(0, len(rows), 2):
+ for i in range(0, len(rows), 2):
# parse the first row
name_row = rows[i]
@@ -79,14 +78,14 @@ def response(resp):
groups = size_re.match(item).groups()
multiplier = get_filesize_mul(groups[1])
params['filesize'] = int(multiplier * float(groups[0]))
- except Exception as e:
+ except:
pass
elif item.startswith('Date:'):
try:
# Date: 2016-02-21 21:44 UTC
date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
params['publishedDate'] = date
- except Exception as e:
+ except:
pass
elif item.startswith('Comment:'):
params['content'] = item
diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py
index f9c832651..dda56fc22 100644
--- a/searx/engines/torrentz.py
+++ b/searx/engines/torrentz.py
@@ -12,11 +12,11 @@
"""
import re
-from urllib import urlencode
from lxml import html
-from searx.engines.xpath import extract_text
from datetime import datetime
from searx.engines.nyaa import int_or_zero, get_filesize_mul
+from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
# engine dependent config
categories = ['files', 'videos', 'music']
@@ -70,7 +70,7 @@ def response(resp):
size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
size, suffix = size_str.split()
params['filesize'] = int(size) * get_filesize_mul(suffix)
- except Exception as e:
+ except:
pass
# does our link contain a valid SHA1 sum?
@@ -84,7 +84,7 @@ def response(resp):
# Fri, 25 Mar 2016 16:29:01
date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
params['publishedDate'] = date
- except Exception as e:
+ except:
pass
results.append(params)
diff --git a/searx/engines/translated.py b/searx/engines/translated.py
index e78db0d8e..5c7b17033 100644
--- a/searx/engines/translated.py
+++ b/searx/engines/translated.py
@@ -9,8 +9,12 @@
@parse url, title, content
"""
import re
+from sys import version_info
from searx.utils import is_valid_lang
+if version_info[0] == 3:
+ unicode = str
+
categories = ['general']
url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 6cca05f70..038cef47f 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -12,11 +12,10 @@
@todo publishedDate
"""
-from urlparse import urljoin
-from urllib import urlencode
from lxml import html
from datetime import datetime
from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['social media']
diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py
index 5d5310544..1408be8df 100644
--- a/searx/engines/vimeo.py
+++ b/searx/engines/vimeo.py
@@ -13,8 +13,8 @@
# @todo set content-parameter with correct data
from json import loads
-from urllib import urlencode
from dateutil import parser
+from searx.url_utils import urlencode
# engine dependent config
categories = ['videos']
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 3f849bc7d..be217463c 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -14,12 +14,11 @@
from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
-from searx.utils import format_date_by_locale
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
from json import loads
from lxml.html import fromstring
-from urllib import urlencode
logger = logger.getChild('wikidata')
result_count = 1
@@ -62,14 +61,13 @@ def request(query, params):
language = 'en'
params['url'] = url_search.format(
- query=urlencode({'label': query,
- 'language': language}))
+ query=urlencode({'label': query, 'language': language}))
return params
def response(resp):
results = []
- html = fromstring(resp.content)
+ html = fromstring(resp.text)
wikidata_ids = html.xpath(wikidata_ids_xpath)
language = resp.search_params['language'].split('-')[0]
@@ -78,10 +76,9 @@ def response(resp):
# TODO: make requests asynchronous to avoid timeout when result_count > 1
for wikidata_id in wikidata_ids[:result_count]:
- url = url_detail.format(query=urlencode({'page': wikidata_id,
- 'uselang': language}))
+ url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
htmlresponse = get(url)
- jsonresponse = loads(htmlresponse.content)
+ jsonresponse = loads(htmlresponse.text)
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
return results
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index 3af8f1c71..db2fdc000 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -11,13 +11,12 @@
"""
from json import loads
-from urllib import urlencode, quote
from lxml.html import fromstring
-
+from searx.url_utils import quote, urlencode
# search-url
-base_url = 'https://{language}.wikipedia.org/'
-search_postfix = 'w/api.php?'\
+base_url = u'https://{language}.wikipedia.org/'
+search_url = base_url + u'w/api.php?'\
'action=query'\
'&format=json'\
'&{query}'\
@@ -37,16 +36,16 @@ def url_lang(lang):
else:
language = lang
- return base_url.format(language=language)
+ return language
# do search-request
def request(query, params):
if query.islower():
- query += '|' + query.title()
+ query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8')
- params['url'] = url_lang(params['language']) \
- + search_postfix.format(query=urlencode({'titles': query}))
+ params['url'] = search_url.format(query=urlencode({'titles': query}),
+ language=url_lang(params['language']))
return params
@@ -78,7 +77,7 @@ def extract_first_paragraph(content, title, image):
def response(resp):
results = []
- search_result = loads(resp.content)
+ search_result = loads(resp.text)
# wikipedia article's unique id
# first valid id is assumed to be the requested article
@@ -99,11 +98,9 @@ def response(resp):
extract = page.get('extract')
summary = extract_first_paragraph(extract, title, image)
- if not summary:
- return []
# link to wikipedia article
- wikipedia_link = url_lang(resp.search_params['language']) \
+ wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
+ 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
results.append({'url': wikipedia_link, 'title': title})
diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py
index e743c8f56..595c6b7de 100644
--- a/searx/engines/wolframalpha_api.py
+++ b/searx/engines/wolframalpha_api.py
@@ -8,8 +8,8 @@
# @stable yes
# @parse url, infobox
-from urllib import urlencode
from lxml import etree
+from searx.url_utils import urlencode
# search-url
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
@@ -37,8 +37,7 @@ image_pods = {'VisualRepresentation',
# do search-request
def request(query, params):
- params['url'] = search_url.format(query=urlencode({'input': query}),
- api_key=api_key)
+ params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key)
params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
return params
@@ -56,7 +55,7 @@ def replace_pua_chars(text):
u'\uf74e': 'i', # imaginary number
u'\uf7d9': '='} # equals sign
- for k, v in pua_chars.iteritems():
+ for k, v in pua_chars.items():
text = text.replace(k, v)
return text
@@ -66,7 +65,7 @@ def replace_pua_chars(text):
def response(resp):
results = []
- search_results = etree.XML(resp.content)
+ search_results = etree.XML(resp.text)
# return empty array if there are no results
if search_results.xpath(failure_xpath):
@@ -120,10 +119,10 @@ def response(resp):
# append infobox
results.append({'infobox': infobox_title,
'attributes': result_chunks,
- 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
+ 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
# append link to site
- results.append({'url': resp.request.headers['Referer'].decode('utf8'),
+ results.append({'url': resp.request.headers['Referer'],
'title': title,
'content': result_content})
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 1534501b3..2a8642f92 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -10,10 +10,9 @@
from json import loads
from time import time
-from urllib import urlencode
-from lxml.etree import XML
from searx.poolrequests import get as http_get
+from searx.url_utils import urlencode
# search-url
url = 'https://www.wolframalpha.com/'
@@ -62,7 +61,7 @@ obtain_token()
# do search-request
def request(query, params):
# obtain token if last update was more than an hour
- if time() - token['last_updated'] > 3600:
+ if time() - (token['last_updated'] or 0) > 3600:
obtain_token()
params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
@@ -112,9 +111,9 @@ def response(resp):
results.append({'infobox': infobox_title,
'attributes': result_chunks,
- 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
+ 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
- results.append({'url': resp.request.headers['Referer'].decode('utf8'),
+ results.append({'url': resp.request.headers['Referer'],
'title': 'Wolfram|Alpha (' + infobox_title + ')',
'content': result_content})
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
index 1269a5422..508803240 100644
--- a/searx/engines/www1x.py
+++ b/searx/engines/www1x.py
@@ -10,11 +10,9 @@
@parse url, title, thumbnail, img_src, content
"""
-from urllib import urlencode
-from urlparse import urljoin
from lxml import html
-import string
import re
+from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['images']
@@ -55,7 +53,7 @@ def response(resp):
cur_element += result_part
# fix xml-error
- cur_element = string.replace(cur_element, '"></a>', '"/></a>')
+ cur_element = cur_element.replace('"></a>', '"/></a>')
dom = html.fromstring(cur_element)
link = dom.xpath('//a')[0]
diff --git a/searx/engines/www500px.py b/searx/engines/www500px.py
index 546521ba3..7a2015ae9 100644
--- a/searx/engines/www500px.py
+++ b/searx/engines/www500px.py
@@ -13,8 +13,7 @@
"""
from json import loads
-from urllib import urlencode
-from urlparse import urljoin
+from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['images']
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index 0d39b28a8..f466697bd 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -1,13 +1,13 @@
from lxml import html
-from urllib import urlencode, unquote
-from urlparse import urlparse, urljoin
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
from searx.utils import html_to_text
+from searx.url_utils import unquote, urlencode, urljoin, urlparse
search_url = None
url_xpath = None
content_xpath = None
title_xpath = None
+paging = False
suggestion_xpath = ''
results_xpath = ''
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index 7b1b6b35d..a62a1296e 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -13,8 +13,8 @@
# @todo parse video, audio and file results
from json import loads
-from urllib import urlencode
from dateutil import parser
+from searx.url_utils import urlencode
from searx.utils import html_to_text
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index 5c62c2ed8..5387aaf54 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -11,10 +11,9 @@
@parse url, title, content, suggestion
"""
-from urllib import urlencode
-from urlparse import unquote
from lxml import html
from searx.engines.xpath import extract_text, extract_url
+from searx.url_utils import unquote, urlencode
# engine dependent config
categories = ['general']
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
index 1a0fd28f5..ae54a4acd 100644
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@@ -9,13 +9,13 @@
# @stable no (HTML can change)
# @parse url, title, content, publishedDate
-from urllib import urlencode
+import re
+from datetime import datetime, timedelta
from lxml import html
from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
-from datetime import datetime, timedelta
-import re
from dateutil import parser
+from searx.url_utils import urlencode
# engine dependent config
categories = ['news']
diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py
index 65aee28b8..1c789f6cb 100644
--- a/searx/engines/yandex.py
+++ b/searx/engines/yandex.py
@@ -9,9 +9,9 @@
@parse url, title, content
"""
-from urllib import urlencode
from lxml import html
-from searx.search import logger
+from searx import logger
+from searx.url_utils import urlencode
logger = logger.getChild('yandex engine')
diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py
index 1dfca5166..6de18aa2c 100644
--- a/searx/engines/youtube_api.py
+++ b/searx/engines/youtube_api.py
@@ -9,8 +9,8 @@
# @parse url, title, content, publishedDate, thumbnail, embedded
from json import loads
-from urllib import urlencode
from dateutil import parser
+from searx.url_utils import urlencode
# engine dependent config
categories = ['videos', 'music']
diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py
index 9b7ca64c8..9f01841f6 100644
--- a/searx/engines/youtube_noapi.py
+++ b/searx/engines/youtube_noapi.py
@@ -8,10 +8,10 @@
# @stable no
# @parse url, title, content, publishedDate, thumbnail, embedded
-from urllib import quote_plus
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import list_get
+from searx.url_utils import quote_plus
# engine dependent config
categories = ['videos', 'music']
diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py
index 011d36260..46c1f8918 100644
--- a/searx/plugins/__init__.py
+++ b/searx/plugins/__init__.py
@@ -14,9 +14,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
'''
-from sys import exit
+from sys import exit, version_info
from searx import logger
+if version_info[0] == 3:
+ unicode = str
+
logger = logger.getChild('plugins')
from searx.plugins import (doai_rewrite,
diff --git a/searx/plugins/doai_rewrite.py b/searx/plugins/doai_rewrite.py
index a6e15ae5a..95efa8f9b 100644
--- a/searx/plugins/doai_rewrite.py
+++ b/searx/plugins/doai_rewrite.py
@@ -1,6 +1,6 @@
from flask_babel import gettext
import re
-from urlparse import urlparse, parse_qsl
+from searx.url_utils import urlparse, parse_qsl
regex = re.compile(r'10\.\d{4,9}/[^\s]+')
diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py
index 8b4c9784e..4462c86bc 100644
--- a/searx/plugins/https_rewrite.py
+++ b/searx/plugins/https_rewrite.py
@@ -16,14 +16,17 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
'''
import re
-from urlparse import urlparse
+import sys
from lxml import etree
from os import listdir, environ
from os.path import isfile, isdir, join
from searx.plugins import logger
from flask_babel import gettext
from searx import searx_dir
+from searx.url_utils import urlparse
+if sys.version_info[0] == 3:
+ unicode = str
name = "HTTPS rewrite"
description = gettext('Rewrite HTTP links to HTTPS if possible')
diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py
index a2aeda98e..8d6c661ad 100644
--- a/searx/plugins/self_info.py
+++ b/searx/plugins/self_info.py
@@ -22,7 +22,7 @@ default_on = True
# Self User Agent regex
-p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
+p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
# attach callback to the post search hook
@@ -31,7 +31,7 @@ p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
def post_search(request, search):
if search.search_query.pageno > 1:
return True
- if search.search_query.query == 'ip':
+ if search.search_query.query == b'ip':
x_forwarded_for = request.headers.getlist("X-Forwarded-For")
if x_forwarded_for:
ip = x_forwarded_for[0]
diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py
index 68a004e33..a84012828 100644
--- a/searx/plugins/tracker_url_remover.py
+++ b/searx/plugins/tracker_url_remover.py
@@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from flask_babel import gettext
import re
-from urlparse import urlunparse
+from searx.url_utils import urlunparse
regexes = {re.compile(r'utm_[^&]+&?'),
re.compile(r'(wkey|wemail)[^&]+&?'),
diff --git a/searx/preferences.py b/searx/preferences.py
index 43d9ec0dd..b6a2ec4cc 100644
--- a/searx/preferences.py
+++ b/searx/preferences.py
@@ -23,7 +23,7 @@ class Setting(object):
def __init__(self, default_value, **kwargs):
super(Setting, self).__init__()
self.value = default_value
- for key, value in kwargs.iteritems():
+ for key, value in kwargs.items():
setattr(self, key, value)
self._post_init()
@@ -38,7 +38,7 @@ class Setting(object):
return self.value
def save(self, name, resp):
- resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE)
+ resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE)
class StringSetting(Setting):
@@ -133,7 +133,7 @@ class MapSetting(Setting):
def save(self, name, resp):
if hasattr(self, 'key'):
- resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE)
+ resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE)
class SwitchableSetting(Setting):
@@ -194,7 +194,7 @@ class EnginesSetting(SwitchableSetting):
def _post_init(self):
super(EnginesSetting, self)._post_init()
transformed_choices = []
- for engine_name, engine in self.choices.iteritems():
+ for engine_name, engine in self.choices.items():
for category in engine.categories:
transformed_choice = dict()
transformed_choice['default_on'] = not engine.disabled
@@ -241,9 +241,9 @@ class Preferences(object):
'language': SearchLanguageSetting(settings['search']['language'],
choices=LANGUAGE_CODES),
'locale': EnumStringSetting(settings['ui']['default_locale'],
- choices=settings['locales'].keys() + ['']),
+ choices=list(settings['locales'].keys()) + ['']),
'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
- choices=autocomplete.backends.keys() + ['']),
+ choices=list(autocomplete.backends.keys()) + ['']),
'image_proxy': MapSetting(settings['server']['image_proxy'],
map={'': settings['server']['image_proxy'],
'0': False,
@@ -260,7 +260,7 @@ class Preferences(object):
self.unknown_params = {}
def parse_cookies(self, input_data):
- for user_setting_name, user_setting in input_data.iteritems():
+ for user_setting_name, user_setting in input_data.items():
if user_setting_name in self.key_value_settings:
self.key_value_settings[user_setting_name].parse(user_setting)
elif user_setting_name == 'disabled_engines':
@@ -274,7 +274,7 @@ class Preferences(object):
disabled_engines = []
enabled_categories = []
disabled_plugins = []
- for user_setting_name, user_setting in input_data.iteritems():
+ for user_setting_name, user_setting in input_data.items():
if user_setting_name in self.key_value_settings:
self.key_value_settings[user_setting_name].parse(user_setting)
elif user_setting_name.startswith('engine_'):
@@ -295,7 +295,7 @@ class Preferences(object):
return self.key_value_settings[user_setting_name].get_value()
def save(self, resp):
- for user_setting_name, user_setting in self.key_value_settings.iteritems():
+ for user_setting_name, user_setting in self.key_value_settings.items():
user_setting.save(user_setting_name, resp)
self.engines.save(resp)
self.plugins.save(resp)
diff --git a/searx/query.py b/searx/query.py
index b8b1c0d2f..828a6fb30 100644
--- a/searx/query.py
+++ b/searx/query.py
@@ -21,8 +21,12 @@ from searx.languages import language_codes
from searx.engines import (
categories, engines, engine_shortcuts
)
-import string
import re
+import string
+import sys
+
+if sys.version_info[0] == 3:
+ unicode = str
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
@@ -146,7 +150,7 @@ class SearchQuery(object):
"""container for all the search parameters (query, language, etc...)"""
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
- self.query = query
+ self.query = query.encode('utf-8')
self.engines = engines
self.categories = categories
self.lang = lang
diff --git a/searx/results.py b/searx/results.py
index e262ec110..b6d408e29 100644
--- a/searx/results.py
+++ b/searx/results.py
@@ -1,9 +1,13 @@
import re
+import sys
from collections import defaultdict
from operator import itemgetter
from threading import RLock
-from urlparse import urlparse, unquote
from searx.engines import engines
+from searx.url_utils import urlparse, unquote
+
+if sys.version_info[0] == 3:
+ basestring = str
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
diff --git a/searx/search.py b/searx/search.py
index 980cfeb99..790e7d071 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -16,8 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
'''
import gc
+import sys
import threading
-from thread import start_new_thread
from time import time
from uuid import uuid4
import requests.exceptions
@@ -33,6 +33,14 @@ from searx import logger
from searx.plugins import plugins
from searx.exceptions import SearxParameterException
+try:
+ from thread import start_new_thread
+except:
+ from _thread import start_new_thread
+
+if sys.version_info[0] == 3:
+ unicode = str
+
logger = logger.getChild('search')
number_of_searches = 0
@@ -387,7 +395,7 @@ class Search(object):
request_params['time_range'] = search_query.time_range
# append request to list
- requests.append((selected_engine['name'], search_query.query.encode('utf-8'), request_params))
+ requests.append((selected_engine['name'], search_query.query, request_params))
# update timeout_limit
timeout_limit = max(timeout_limit, engine.timeout)
diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml
index dbaf2fd52..59320480c 100644
--- a/searx/settings_robot.yml
+++ b/searx/settings_robot.yml
@@ -17,7 +17,7 @@ server:
ui:
themes_path : ""
- default_theme : legacy
+ default_theme : oscar
default_locale : ""
outgoing:
diff --git a/searx/templates/courgette/404.html b/searx/templates/courgette/404.html
index 77f1287ab..9e3b8ac29 100644
--- a/searx/templates/courgette/404.html
+++ b/searx/templates/courgette/404.html
@@ -3,7 +3,7 @@
<div class="center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
- <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+ <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}
diff --git a/searx/templates/legacy/404.html b/searx/templates/legacy/404.html
index 05c14e155..3e889dd21 100644
--- a/searx/templates/legacy/404.html
+++ b/searx/templates/legacy/404.html
@@ -3,7 +3,7 @@
<div class="center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
- <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+ <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}
diff --git a/searx/templates/oscar/404.html b/searx/templates/oscar/404.html
index 11d789564..5a50880a9 100644
--- a/searx/templates/oscar/404.html
+++ b/searx/templates/oscar/404.html
@@ -3,7 +3,7 @@
<div class="text-center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
- <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+ <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}
diff --git a/searx/templates/pix-art/404.html b/searx/templates/pix-art/404.html
index 592e8610f..389bb5ec1 100644
--- a/searx/templates/pix-art/404.html
+++ b/searx/templates/pix-art/404.html
@@ -3,7 +3,7 @@
<div class="center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
- <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+ <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}
diff --git a/searx/testing.py b/searx/testing.py
index 312e9f295..0d17b2a08 100644
--- a/searx/testing.py
+++ b/searx/testing.py
@@ -1,13 +1,16 @@
# -*- coding: utf-8 -*-
"""Shared testing code."""
-from plone.testing import Layer
-from unittest2 import TestCase
-from os.path import dirname, join, abspath
-
import os
import subprocess
+import traceback
+
+
+from os.path import dirname, join, abspath
+
+from splinter import Browser
+from unittest2 import TestCase
class SearxTestLayer:
@@ -32,7 +35,7 @@ class SearxTestLayer:
testTearDown = classmethod(testTearDown)
-class SearxRobotLayer(Layer):
+class SearxRobotLayer():
"""Searx Robot Test Layer"""
def setUp(self):
@@ -62,7 +65,12 @@ class SearxRobotLayer(Layer):
del os.environ['SEARX_SETTINGS_PATH']
-SEARXROBOTLAYER = SearxRobotLayer()
+# SEARXROBOTLAYER = SearxRobotLayer()
+def run_robot_tests(tests):
+ print('Running {0} tests'.format(len(tests)))
+ for test in tests:
+ with Browser() as browser:
+ test(browser)
class SearxTestCase(TestCase):
@@ -72,17 +80,19 @@ class SearxTestCase(TestCase):
if __name__ == '__main__':
- from tests.test_robot import test_suite
import sys
- from zope.testrunner.runner import Runner
+ # test cases
+ from tests import robot
base_dir = abspath(join(dirname(__file__), '../tests'))
if sys.argv[1] == 'robot':
- r = Runner(['--color',
- '--auto-progress',
- '--stop-on-error',
- '--path',
- base_dir],
- found_suites=[test_suite()])
- r.run()
- sys.exit(int(r.failed))
+ test_layer = SearxRobotLayer()
+ errors = False
+ try:
+ test_layer.setUp()
+ run_robot_tests([getattr(robot, x) for x in dir(robot) if x.startswith('test_')])
+ except Exception:
+ errors = True
+ print('Error occured: {0}'.format(traceback.format_exc()))
+ test_layer.tearDown()
+ sys.exit(1 if errors else 0)
diff --git a/searx/url_utils.py b/searx/url_utils.py
new file mode 100644
index 000000000..e9919ab30
--- /dev/null
+++ b/searx/url_utils.py
@@ -0,0 +1,28 @@
+from sys import version_info
+
+if version_info[0] == 2:
+ from urllib import quote, quote_plus, unquote, urlencode
+ from urlparse import parse_qsl, urljoin, urlparse, urlunparse, ParseResult
+else:
+ from urllib.parse import (
+ parse_qsl,
+ quote,
+ quote_plus,
+ unquote,
+ urlencode,
+ urljoin,
+ urlparse,
+ urlunparse,
+ ParseResult
+ )
+
+
+__export__ = (parse_qsl,
+ quote,
+ quote_plus,
+ unquote,
+ urlencode,
+ urljoin,
+ urlparse,
+ urlunparse,
+ ParseResult)
diff --git a/searx/utils.py b/searx/utils.py
index 35cb6f8a6..f24c57afa 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -1,11 +1,9 @@
-import cStringIO
import csv
import os
import re
from babel.dates import format_date
from codecs import getincrementalencoder
-from HTMLParser import HTMLParser
from imp import load_source
from os.path import splitext, join
from random import choice
@@ -16,6 +14,19 @@ from searx.languages import language_codes
from searx import settings
from searx import logger
+try:
+ from cStringIO import StringIO
+except:
+ from io import StringIO
+
+try:
+ from HTMLParser import HTMLParser
+except:
+ from html.parser import HTMLParser
+
+if sys.version_info[0] == 3:
+ unichr = chr
+ unicode = str
logger = logger.getChild('utils')
@@ -140,7 +151,7 @@ class UnicodeWriter:
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
- self.queue = cStringIO.StringIO()
+ self.queue = StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = getincrementalencoder(encoding)()
@@ -152,14 +163,13 @@ class UnicodeWriter:
unicode_row.append(col.encode('utf-8').strip())
else:
unicode_row.append(col)
- self.writer.writerow(unicode_row)
+ self.writer.writerow([x.decode('utf-8') if hasattr(x, 'decode') else x for x in unicode_row])
# Fetch UTF-8 output from the queue ...
- data = self.queue.getvalue()
- data = data.decode("utf-8")
+ data = self.queue.getvalue().strip('\x00')
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
- self.stream.write(data)
+ self.stream.write(data.decode('utf-8'))
# empty queue
self.queue.truncate(0)
@@ -231,7 +241,7 @@ def dict_subset(d, properties):
def prettify_url(url, max_length=74):
if len(url) > max_length:
- chunk_len = max_length / 2 + 1
+ chunk_len = int(max_length / 2 + 1)
return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
else:
return url
diff --git a/searx/webapp.py b/searx/webapp.py
index 2aba4556d..03b572955 100644
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -22,11 +22,12 @@ if __name__ == '__main__':
from os.path import realpath, dirname
path.append(realpath(dirname(realpath(__file__)) + '/../'))
-import cStringIO
import hashlib
import hmac
import json
import os
+import sys
+
import requests
from searx import logger
@@ -42,8 +43,6 @@ except:
exit(1)
from cgi import escape
from datetime import datetime, timedelta
-from urllib import urlencode
-from urlparse import urlparse, urljoin
from werkzeug.contrib.fixers import ProxyFix
from flask import (
Flask, request, render_template, url_for, Response, make_response,
@@ -52,7 +51,7 @@ from flask import (
from flask_babel import Babel, gettext, format_date, format_decimal
from flask.json import jsonify
from searx import settings, searx_dir, searx_debug
-from searx.exceptions import SearxException, SearxParameterException
+from searx.exceptions import SearxParameterException
from searx.engines import (
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
)
@@ -69,6 +68,7 @@ from searx.autocomplete import searx_bang, backends as autocomplete_backends
from searx.plugins import plugins
from searx.preferences import Preferences, ValidationException
from searx.answerers import answerers
+from searx.url_utils import urlencode, urlparse, urljoin
# check if the pyopenssl package is installed.
# It is needed for SSL connection without trouble, see #298
@@ -78,6 +78,15 @@ except ImportError:
logger.critical("The pyopenssl package has to be installed.\n"
"Some HTTPS connections will fail")
+try:
+ from cStringIO import StringIO
+except:
+ from io import StringIO
+
+
+if sys.version_info[0] == 3:
+ unicode = str
+
# serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@@ -357,6 +366,8 @@ def render(template_name, override_theme=None, **kwargs):
kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
+ kwargs['unicode'] = unicode
+
kwargs['scripts'] = set()
for plugin in request.user_plugins:
for script in plugin.js_dependencies:
@@ -375,7 +386,7 @@ def render(template_name, override_theme=None, **kwargs):
def pre_request():
request.errors = []
- preferences = Preferences(themes, categories.keys(), engines, plugins)
+ preferences = Preferences(themes, list(categories.keys()), engines, plugins)
request.preferences = preferences
try:
preferences.parse_cookies(request.cookies)
@@ -479,10 +490,8 @@ def index():
for result in results:
if output_format == 'html':
if 'content' in result and result['content']:
- result['content'] = highlight_content(escape(result['content'][:1024]),
- search_query.query.encode('utf-8'))
- result['title'] = highlight_content(escape(result['title'] or u''),
- search_query.query.encode('utf-8'))
+ result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
+ result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
else:
if result.get('content'):
result['content'] = html_to_text(result['content']).strip()
@@ -510,7 +519,7 @@ def index():
result['publishedDate'] = format_date(result['publishedDate'])
if output_format == 'json':
- return Response(json.dumps({'query': search_query.query,
+ return Response(json.dumps({'query': search_query.query.decode('utf-8'),
'number_of_results': number_of_results,
'results': results,
'answers': list(result_container.answers),
@@ -519,7 +528,7 @@ def index():
'suggestions': list(result_container.suggestions)}),
mimetype='application/json')
elif output_format == 'csv':
- csv = UnicodeWriter(cStringIO.StringIO())
+ csv = UnicodeWriter(StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
csv.writerow(keys)
for row in results:
@@ -527,7 +536,7 @@ def index():
csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv')
- cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8'))
+ cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
response.headers.add('Content-Disposition', cont_disp)
return response
elif output_format == 'rss':
@@ -578,7 +587,7 @@ def autocompleter():
disabled_engines = request.preferences.engines.get_disabled()
# parse query
- raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines)
+ raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
raw_text_query.parse_query()
# check if search query is set
@@ -820,6 +829,7 @@ def page_not_found(e):
def run():
+ logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address'])
app.run(
debug=searx_debug,
use_debugger=searx_debug,
diff --git a/tests/robot/__init__.py b/tests/robot/__init__.py
index e69de29bb..038a3196f 100644
--- a/tests/robot/__init__.py
+++ b/tests/robot/__init__.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+
+from time import sleep
+
+url = "http://localhost:11111/"
+
+
+def test_index(browser):
+ # Visit URL
+ browser.visit(url)
+ assert browser.is_text_present('about')
+
+
+def test_404(browser):
+ # Visit URL
+ browser.visit(url + 'missing_link')
+ assert browser.is_text_present('Page not found')
+
+
+def test_about(browser):
+ browser.visit(url)
+ browser.click_link_by_text('about')
+ assert browser.is_text_present('Why use searx?')
+
+
+def test_preferences(browser):
+ browser.visit(url)
+ browser.click_link_by_text('preferences')
+ assert browser.is_text_present('Preferences')
+ assert browser.is_text_present('Cookies')
+
+ assert browser.is_element_present_by_xpath('//label[@for="checkbox_dummy"]')
+
+
+def test_preferences_engine_select(browser):
+ browser.visit(url)
+ browser.click_link_by_text('preferences')
+
+ assert browser.is_element_present_by_xpath('//a[@href="#tab_engine"]')
+ browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
+
+ assert not browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
+ browser.find_by_xpath('//label[@for="engine_general_dummy__general"]').first.check()
+ browser.find_by_xpath('//input[@value="save"]').first.click()
+
+ # waiting for the redirect - without this the test is flaky..
+ sleep(1)
+
+ browser.visit(url)
+ browser.click_link_by_text('preferences')
+ browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
+
+ assert browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
+
+
+def test_preferences_locale(browser):
+ browser.visit(url)
+ browser.click_link_by_text('preferences')
+
+ browser.select('locale', 'hu')
+ browser.find_by_xpath('//input[@value="save"]').first.click()
+
+ # waiting for the redirect - without this the test is flaky..
+ sleep(1)
+
+ browser.visit(url)
+ browser.click_link_by_text('beállítások')
+ browser.is_text_present('Beállítások')
+
+
+def test_search(browser):
+ browser.visit(url)
+ browser.fill('q', 'test search query')
+ browser.find_by_xpath('//button[@type="submit"]').first.click()
+ assert browser.is_text_present('didn\'t find any results')
diff --git a/tests/robot/test_basic.robot b/tests/robot/test_basic.robot
deleted file mode 100644
index d0074cd00..000000000
--- a/tests/robot/test_basic.robot
+++ /dev/null
@@ -1,153 +0,0 @@
-*** Settings ***
-Library Selenium2Library timeout=10 implicit_wait=0.5
-Test Setup Open Browser http://localhost:11111/
-Test Teardown Close All Browsers
-
-
-*** Keywords ***
-Submit Preferences
- Set Selenium Speed 2 seconds
- Submit Form id=search_form
- Location Should Be http://localhost:11111/
- Set Selenium Speed 0 seconds
-
-
-*** Test Cases ***
-Front page
- Page Should Contain about
- Page Should Contain preferences
-
-404 page
- Go To http://localhost:11111/no-such-page
- Page Should Contain Page not found
- Page Should Contain Go to search page
-
-About page
- Click Element link=about
- Page Should Contain Why use searx?
- Page Should Contain Element link=search engines
-
-Preferences page
- Click Element link=preferences
- Page Should Contain Preferences
- Page Should Contain Default categories
- Page Should Contain Currently used search engines
- Page Should Contain dummy dummy
- Page Should Contain general dummy
-
-Switch category
- Go To http://localhost:11111/preferences
- Page Should Contain Checkbox category_general
- Page Should Contain Checkbox category_dummy
- Click Element xpath=//*[.="general"]
- Click Element xpath=//*[.="dummy"]
- Submit Preferences
- Checkbox Should Not Be Selected category_general
- Checkbox Should Be Selected category_dummy
-
-Change language
- Page Should Contain about
- Page Should Contain preferences
- Go To http://localhost:11111/preferences
- Select From List locale hu
- Submit Preferences
- Page Should Contain rólunk
- Page Should Contain beállítások
-
-Change method
- Page Should Contain about
- Page Should Contain preferences
- Go To http://localhost:11111/preferences
- Select From List method GET
- Submit Preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be method GET
- Select From List method POST
- Submit Preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be method POST
-
-Change theme
- Page Should Contain about
- Page Should Contain preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be theme legacy
- Select From List theme oscar
- Submit Preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be theme oscar
-
-Change safesearch
- Page Should Contain about
- Page Should Contain preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be safesearch None
- Select From List safesearch Strict
- Submit Preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be safesearch Strict
-
-Change image proxy
- Page Should Contain about
- Page Should Contain preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be image_proxy Disabled
- Select From List image_proxy Enabled
- Submit Preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be image_proxy Enabled
-
-Change search language
- Page Should Contain about
- Page Should Contain preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be language Default language
- Select From List language Türkçe - tr-TR
- Submit Preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be language Türkçe - tr-TR
-
-Change autocomplete
- Page Should Contain about
- Page Should Contain preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be autocomplete -
- Select From List autocomplete google
- Submit Preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be autocomplete google
-
-Change allowed/disabled engines
- Page Should Contain about
- Page Should Contain preferences
- Go To http://localhost:11111/preferences
- Page Should Contain Engine name
- Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block
- Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] Block
- Click Element xpath=//label[@class="deny"][@for='engine_general_general_dummy']
- Submit Preferences
- Page Should Contain about
- Page Should Contain preferences
- Go To http://localhost:11111/preferences
- Page Should Contain Engine name
- Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block
- Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] \
-
-Block a plugin
- Page Should Contain about
- Page Should Contain preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be theme legacy
- Select From List theme oscar
- Submit Preferences
- Go To http://localhost:11111/preferences
- List Selection Should Be theme oscar
- Page Should Contain Plugins
- Click Link Plugins
- Checkbox Should Not Be Selected id=plugin_HTTPS_rewrite
- Click Element xpath=//label[@for='plugin_HTTPS_rewrite']
- Submit Preferences
- Go To http://localhost:11111/preferences
- Page Should Contain Plugins
- Click Link Plugins
- Checkbox Should Be Selected id=plugin_HTTPS_rewrite
diff --git a/tests/unit/engines/test_archlinux.py b/tests/unit/engines/test_archlinux.py
index d0009d63a..e4ee0339c 100644
--- a/tests/unit/engines/test_archlinux.py
+++ b/tests/unit/engines/test_archlinux.py
@@ -25,7 +25,7 @@ class TestArchLinuxEngine(SearxTestCase):
self.assertTrue(query in params['url'])
self.assertTrue('wiki.archlinux.org' in params['url'])
- for lang, domain in domains.iteritems():
+ for lang, domain in domains.items():
dic['language'] = lang
params = archlinux.request(query, dic)
self.assertTrue(domain in params['url'])
@@ -102,5 +102,5 @@ class TestArchLinuxEngine(SearxTestCase):
for exp in expected:
res = results[i]
i += 1
- for key, value in exp.iteritems():
+ for key, value in exp.items():
self.assertEqual(res[key], value)
diff --git a/tests/unit/engines/test_bing.py b/tests/unit/engines/test_bing.py
index a63b2e333..523ec57b8 100644
--- a/tests/unit/engines/test_bing.py
+++ b/tests/unit/engines/test_bing.py
@@ -7,18 +7,18 @@ from searx.testing import SearxTestCase
class TestBingEngine(SearxTestCase):
def test_request(self):
- query = 'test_query'
+ query = u'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
dicto['language'] = 'fr_FR'
- params = bing.request(query, dicto)
+ params = bing.request(query.encode('utf-8'), dicto)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
self.assertTrue('language%3AFR' in params['url'])
self.assertTrue('bing.com' in params['url'])
dicto['language'] = 'all'
- params = bing.request(query, dicto)
+ params = bing.request(query.encode('utf-8'), dicto)
self.assertTrue('language' in params['url'])
def test_response(self):
diff --git a/tests/unit/engines/test_bing_news.py b/tests/unit/engines/test_bing_news.py
index b6793f7be..e571adcee 100644
--- a/tests/unit/engines/test_bing_news.py
+++ b/tests/unit/engines/test_bing_news.py
@@ -36,10 +36,10 @@ class TestBingNewsEngine(SearxTestCase):
self.assertRaises(AttributeError, bing_news.response, '')
self.assertRaises(AttributeError, bing_news.response, '[]')
- response = mock.Mock(content='<html></html>')
+ response = mock.Mock(text='<html></html>')
self.assertEqual(bing_news.response(response), [])
- response = mock.Mock(content='<html></html>')
+ response = mock.Mock(text='<html></html>')
self.assertEqual(bing_news.response(response), [])
html = """<?xml version="1.0" encoding="utf-8" ?>
@@ -74,7 +74,7 @@ class TestBingNewsEngine(SearxTestCase):
</item>
</channel>
</rss>""" # noqa
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html.encode('utf-8'))
results = bing_news.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
@@ -113,7 +113,7 @@ class TestBingNewsEngine(SearxTestCase):
</item>
</channel>
</rss>""" # noqa
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html.encode('utf-8'))
results = bing_news.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
@@ -136,11 +136,11 @@ class TestBingNewsEngine(SearxTestCase):
</channel>
</rss>""" # noqa
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html.encode('utf-8'))
results = bing_news.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html.encode('utf-8'))
self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)
diff --git a/tests/unit/engines/test_btdigg.py b/tests/unit/engines/test_btdigg.py
index 2721f4e7c..6a88e3f75 100644
--- a/tests/unit/engines/test_btdigg.py
+++ b/tests/unit/engines/test_btdigg.py
@@ -22,10 +22,10 @@ class TestBtdiggEngine(SearxTestCase):
self.assertRaises(AttributeError, btdigg.response, '')
self.assertRaises(AttributeError, btdigg.response, '[]')
- response = mock.Mock(content='<html></html>')
+ response = mock.Mock(text='<html></html>')
self.assertEqual(btdigg.response(response), [])
- html = """
+ html = u"""
<div id="search_res">
<table>
<tr>
@@ -82,7 +82,7 @@ class TestBtdiggEngine(SearxTestCase):
</table>
</div>
"""
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
@@ -101,12 +101,12 @@ class TestBtdiggEngine(SearxTestCase):
</table>
</div>
"""
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
- html = """
+ html = u"""
<div id="search_res">
<table>
<tr>
@@ -367,7 +367,7 @@ class TestBtdiggEngine(SearxTestCase):
</table>
</div>
"""
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 5)
diff --git a/tests/unit/engines/test_currency_convert.py b/tests/unit/engines/test_currency_convert.py
index b7720569f..2814d791d 100644
--- a/tests/unit/engines/test_currency_convert.py
+++ b/tests/unit/engines/test_currency_convert.py
@@ -8,13 +8,13 @@ from searx.testing import SearxTestCase
class TestCurrencyConvertEngine(SearxTestCase):
def test_request(self):
- query = 'test_query'
+ query = b'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
params = currency_convert.request(query, dicto)
self.assertNotIn('url', params)
- query = 'convert 10 Pound Sterlings to United States Dollars'
+ query = b'convert 10 Pound Sterlings to United States Dollars'
params = currency_convert.request(query, dicto)
self.assertIn('url', params)
self.assertIn('finance.yahoo.com', params['url'])
diff --git a/tests/unit/engines/test_digbt.py b/tests/unit/engines/test_digbt.py
index 31a1b03a4..31c2ecabb 100644
--- a/tests/unit/engines/test_digbt.py
+++ b/tests/unit/engines/test_digbt.py
@@ -21,7 +21,7 @@ class TestDigBTEngine(SearxTestCase):
self.assertRaises(AttributeError, digbt.response, '')
self.assertRaises(AttributeError, digbt.response, '[]')
- response = mock.Mock(content='<html></html>')
+ response = mock.Mock(text='<html></html>')
self.assertEqual(digbt.response(response), [])
html = """
@@ -50,7 +50,7 @@ class TestDigBTEngine(SearxTestCase):
</td></tr>
</table>
"""
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html.encode('utf-8'))
results = digbt.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
diff --git a/tests/unit/engines/test_duckduckgo.py b/tests/unit/engines/test_duckduckgo.py
index 7d6abad22..8502a2e51 100644
--- a/tests/unit/engines/test_duckduckgo.py
+++ b/tests/unit/engines/test_duckduckgo.py
@@ -90,8 +90,7 @@ class TestDuckduckgoEngine(SearxTestCase):
"wt-wt":"All Results","ar-es":"Argentina","au-en":"Australia","at-de":"Austria","be-fr":"Belgium (fr)"
}some more code..."""
response = mock.Mock(text=js)
- languages = duckduckgo._fetch_supported_languages(response)
- self.assertEqual(type(languages), list)
+ languages = list(duckduckgo._fetch_supported_languages(response))
self.assertEqual(len(languages), 5)
self.assertIn('wt-WT', languages)
self.assertIn('es-AR', languages)
diff --git a/tests/unit/engines/test_frinkiac.py b/tests/unit/engines/test_frinkiac.py
index f3eb021d2..5ea220cd3 100644
--- a/tests/unit/engines/test_frinkiac.py
+++ b/tests/unit/engines/test_frinkiac.py
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
-from json import dumps
from searx.engines import frinkiac
from searx.testing import SearxTestCase
@@ -44,6 +43,8 @@ class TestFrinkiacEngine(SearxTestCase):
self.assertEqual(type(results), list)
self.assertEqual(len(results), 4)
self.assertEqual(results[0]['title'], u'S06E18')
- self.assertEqual(results[0]['url'], 'https://frinkiac.com/?p=caption&e=S06E18&t=534616')
+ self.assertIn('p=caption', results[0]['url'])
+ self.assertIn('e=S06E18', results[0]['url'])
+ self.assertIn('t=534616', results[0]['url'])
self.assertEqual(results[0]['thumbnail_src'], 'https://frinkiac.com/img/S06E18/534616/medium.jpg')
self.assertEqual(results[0]['img_src'], 'https://frinkiac.com/img/S06E18/534616.jpg')
diff --git a/tests/unit/engines/test_gigablast.py b/tests/unit/engines/test_gigablast.py
index 127b974b5..6b2d26458 100644
--- a/tests/unit/engines/test_gigablast.py
+++ b/tests/unit/engines/test_gigablast.py
@@ -10,6 +10,7 @@ class TestGigablastEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
+ dicto['safesearch'] = 0
dicto['language'] = 'all'
params = gigablast.request(query, dicto)
self.assertTrue('url' in params)
diff --git a/tests/unit/engines/test_soundcloud.py b/tests/unit/engines/test_soundcloud.py
index 85495dc57..3077d3b4b 100644
--- a/tests/unit/engines/test_soundcloud.py
+++ b/tests/unit/engines/test_soundcloud.py
@@ -2,7 +2,7 @@ from collections import defaultdict
import mock
from searx.engines import soundcloud
from searx.testing import SearxTestCase
-from urllib import quote_plus
+from searx.url_utils import quote_plus
class TestSoundcloudEngine(SearxTestCase):
diff --git a/tests/unit/engines/test_startpage.py b/tests/unit/engines/test_startpage.py
index 9a1a09bc7..a7a97785e 100644
--- a/tests/unit/engines/test_startpage.py
+++ b/tests/unit/engines/test_startpage.py
@@ -31,7 +31,7 @@ class TestStartpageEngine(SearxTestCase):
self.assertRaises(AttributeError, startpage.response, '')
self.assertRaises(AttributeError, startpage.response, '[]')
- response = mock.Mock(content='<html></html>')
+ response = mock.Mock(text='<html></html>')
self.assertEqual(startpage.response(response), [])
html = """
@@ -62,7 +62,7 @@ class TestStartpageEngine(SearxTestCase):
</p>
</div>
"""
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html.encode('utf-8'))
results = startpage.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
@@ -133,7 +133,7 @@ class TestStartpageEngine(SearxTestCase):
</p>
</div>
"""
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html.encode('utf-8'))
results = startpage.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
diff --git a/tests/unit/engines/test_swisscows.py b/tests/unit/engines/test_swisscows.py
index 27f33d70a..53890be78 100644
--- a/tests/unit/engines/test_swisscows.py
+++ b/tests/unit/engines/test_swisscows.py
@@ -33,13 +33,13 @@ class TestSwisscowsEngine(SearxTestCase):
self.assertRaises(AttributeError, swisscows.response, '')
self.assertRaises(AttributeError, swisscows.response, '[]')
- response = mock.Mock(content='<html></html>')
+ response = mock.Mock(text=b'<html></html>')
self.assertEqual(swisscows.response(response), [])
- response = mock.Mock(content='<html></html>')
+ response = mock.Mock(text=b'<html></html>')
self.assertEqual(swisscows.response(response), [])
- html = u"""
+ html = b"""
<script>
App.Dispatcher.dispatch("initialize", {
html5history: true,
@@ -111,7 +111,7 @@ class TestSwisscowsEngine(SearxTestCase):
});
</script>
"""
- response = mock.Mock(content=html)
+ response = mock.Mock(text=html)
results = swisscows.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 3)
diff --git a/tests/unit/engines/test_tokyotoshokan.py b/tests/unit/engines/test_tokyotoshokan.py
index efe7dbfc2..b5c6fad17 100644
--- a/tests/unit/engines/test_tokyotoshokan.py
+++ b/tests/unit/engines/test_tokyotoshokan.py
@@ -91,7 +91,7 @@ class TestTokyotoshokanEngine(SearxTestCase):
self.assertEqual(r['title'], 'Koyomimonogatari')
self.assertEqual(r['magnetlink'], 'magnet:?xt=urn:btih:4c19eb46b5113685fbd2288ed2531b0b')
self.assertEqual(r['filesize'], int(1024 * 1024 * 10.5))
- self.assertEqual(r['publishedDate'], datetime(2016, 03, 26, 16, 41))
+ self.assertEqual(r['publishedDate'], datetime(2016, 3, 26, 16, 41))
self.assertEqual(r['content'], 'Comment: sample comment')
self.assertEqual(r['seed'], 53)
self.assertEqual(r['leech'], 18)
diff --git a/tests/unit/engines/test_wikidata.py b/tests/unit/engines/test_wikidata.py
index ec5f52ef9..aa69f116e 100644
--- a/tests/unit/engines/test_wikidata.py
+++ b/tests/unit/engines/test_wikidata.py
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
-from json import loads
from lxml.html import fromstring
from collections import defaultdict
import mock
@@ -31,7 +30,7 @@ class TestWikidataEngine(SearxTestCase):
self.assertRaises(AttributeError, wikidata.response, '')
self.assertRaises(AttributeError, wikidata.response, '[]')
- response = mock.Mock(content='<html></html>', search_params={"language": "all"})
+ response = mock.Mock(text='<html></html>', search_params={"language": "all"})
self.assertEqual(wikidata.response(response), [])
def test_getDetail(self):
diff --git a/tests/unit/engines/test_wikipedia.py b/tests/unit/engines/test_wikipedia.py
index 988080b6a..7a86514c7 100644
--- a/tests/unit/engines/test_wikipedia.py
+++ b/tests/unit/engines/test_wikipedia.py
@@ -13,15 +13,15 @@ class TestWikipediaEngine(SearxTestCase):
query = 'test_query'
dicto = defaultdict(dict)
dicto['language'] = 'fr-FR'
- params = wikipedia.request(query, dicto)
+ params = wikipedia.request(query.encode('utf-8'), dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('test_query', params['url'])
self.assertIn('Test_Query', params['url'])
self.assertIn('fr.wikipedia.org', params['url'])
- query = 'Test_Query'
- params = wikipedia.request(query, dicto)
+ query = u'Test_Query'
+ params = wikipedia.request(query.encode('utf-8'), dicto)
self.assertIn('Test_Query', params['url'])
self.assertNotIn('test_query', params['url'])
@@ -57,7 +57,7 @@ class TestWikipediaEngine(SearxTestCase):
}
}
}"""
- response = mock.Mock(content=json, search_params=dicto)
+ response = mock.Mock(text=json, search_params=dicto)
self.assertEqual(wikipedia.response(response), [])
# normal case
@@ -80,7 +80,7 @@ class TestWikipediaEngine(SearxTestCase):
}
}
}"""
- response = mock.Mock(content=json, search_params=dicto)
+ response = mock.Mock(text=json, search_params=dicto)
results = wikipedia.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
@@ -108,10 +108,10 @@ class TestWikipediaEngine(SearxTestCase):
}
}
}"""
- response = mock.Mock(content=json, search_params=dicto)
+ response = mock.Mock(text=json, search_params=dicto)
results = wikipedia.response(response)
self.assertEqual(type(results), list)
- self.assertEqual(len(results), 0)
+ self.assertEqual(len(results), 2)
# no image
json = """
@@ -130,7 +130,7 @@ class TestWikipediaEngine(SearxTestCase):
}
}
}"""
- response = mock.Mock(content=json, search_params=dicto)
+ response = mock.Mock(text=json, search_params=dicto)
results = wikipedia.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
@@ -158,7 +158,7 @@ class TestWikipediaEngine(SearxTestCase):
}
}
}"""
- response = mock.Mock(content=json, search_params=dicto)
+ response = mock.Mock(text=json, search_params=dicto)
results = wikipedia.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
diff --git a/tests/unit/engines/test_wolframalpha_api.py b/tests/unit/engines/test_wolframalpha_api.py
index 64a64ceb3..30d337645 100644
--- a/tests/unit/engines/test_wolframalpha_api.py
+++ b/tests/unit/engines/test_wolframalpha_api.py
@@ -35,11 +35,11 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
xml = '''<?xml version='1.0' encoding='UTF-8'?>
<queryresult success='false' error='false' />
'''
- response = mock.Mock(content=xml)
+ response = mock.Mock(text=xml.encode('utf-8'))
self.assertEqual(wolframalpha_api.response(response), [])
# test basic case
- xml = """<?xml version='1.0' encoding='UTF-8'?>
+ xml = b"""<?xml version='1.0' encoding='UTF-8'?>
<queryresult success='true'
error='false'
numpods='3'
@@ -83,7 +83,7 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
</pod>
</queryresult>
"""
- response = mock.Mock(content=xml, request=request)
+ response = mock.Mock(text=xml, request=request)
results = wolframalpha_api.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
@@ -107,7 +107,7 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
self.assertIn('result_plaintext', results[1]['content'])
# test calc
- xml = """<?xml version='1.0' encoding='UTF-8'?>
+ xml = b"""<?xml version='1.0' encoding='UTF-8'?>
<queryresult success='true'
error='false'
numpods='2'
@@ -144,7 +144,7 @@ class TestWolframAlphaAPIEngine(SearxTestCase):
</pod>
</queryresult>
"""
- response = mock.Mock(content=xml, request=request)
+ response = mock.Mock(text=xml, request=request)
results = wolframalpha_api.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py
index 78dcea478..e497371f8 100644
--- a/tests/unit/test_plugins.py
+++ b/tests/unit/test_plugins.py
@@ -48,11 +48,11 @@ class SelfIPTest(SearxTestCase):
# IP test
request = Mock(remote_addr='127.0.0.1')
request.headers.getlist.return_value = []
- search = get_search_mock(query='ip', pageno=1)
+ search = get_search_mock(query=b'ip', pageno=1)
store.call(store.plugins, 'post_search', request, search)
self.assertTrue('127.0.0.1' in search.result_container.answers)
- search = get_search_mock(query='ip', pageno=2)
+ search = get_search_mock(query=b'ip', pageno=2)
store.call(store.plugins, 'post_search', request, search)
self.assertFalse('127.0.0.1' in search.result_container.answers)
@@ -60,26 +60,26 @@ class SelfIPTest(SearxTestCase):
request = Mock(user_agent='Mock')
request.headers.getlist.return_value = []
- search = get_search_mock(query='user-agent', pageno=1)
+ search = get_search_mock(query=b'user-agent', pageno=1)
store.call(store.plugins, 'post_search', request, search)
self.assertTrue('Mock' in search.result_container.answers)
- search = get_search_mock(query='user-agent', pageno=2)
+ search = get_search_mock(query=b'user-agent', pageno=2)
store.call(store.plugins, 'post_search', request, search)
self.assertFalse('Mock' in search.result_container.answers)
- search = get_search_mock(query='user-agent', pageno=1)
+ search = get_search_mock(query=b'user-agent', pageno=1)
store.call(store.plugins, 'post_search', request, search)
self.assertTrue('Mock' in search.result_container.answers)
- search = get_search_mock(query='user-agent', pageno=2)
+ search = get_search_mock(query=b'user-agent', pageno=2)
store.call(store.plugins, 'post_search', request, search)
self.assertFalse('Mock' in search.result_container.answers)
- search = get_search_mock(query='What is my User-Agent?', pageno=1)
+ search = get_search_mock(query=b'What is my User-Agent?', pageno=1)
store.call(store.plugins, 'post_search', request, search)
self.assertTrue('Mock' in search.result_container.answers)
- search = get_search_mock(query='What is my User-Agent?', pageno=2)
+ search = get_search_mock(query=b'What is my User-Agent?', pageno=2)
store.call(store.plugins, 'post_search', request, search)
self.assertFalse('Mock' in search.result_container.answers)
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 04480791d..eb40e62e2 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,8 +1,12 @@
# -*- coding: utf-8 -*-
import mock
+import sys
from searx.testing import SearxTestCase
from searx import utils
+if sys.version_info[0] == 3:
+ unicode = str
+
class TestUtils(SearxTestCase):
@@ -30,9 +34,9 @@ class TestUtils(SearxTestCase):
self.assertEqual(utils.highlight_content(content, None), content)
content = 'a'
- query = 'test'
+ query = b'test'
self.assertEqual(utils.highlight_content(content, query), content)
- query = 'a test'
+ query = b'a test'
self.assertEqual(utils.highlight_content(content, query), content)
def test_html_to_text(self):
diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py
index 5e5f0b4bf..45a08c1ba 100644
--- a/tests/unit/test_webapp.py
+++ b/tests/unit/test_webapp.py
@@ -2,10 +2,10 @@
import json
from mock import Mock
-from urlparse import ParseResult
from searx import webapp
from searx.testing import SearxTestCase
from searx.search import Search
+from searx.url_utils import ParseResult
class ViewsTestCase(SearxTestCase):
@@ -57,37 +57,35 @@ class ViewsTestCase(SearxTestCase):
def test_index_empty(self):
result = self.app.post('/')
self.assertEqual(result.status_code, 200)
- self.assertIn('<div class="title"><h1>searx</h1></div>', result.data)
+ self.assertIn(b'<div class="title"><h1>searx</h1></div>', result.data)
def test_index_html(self):
result = self.app.post('/', data={'q': 'test'})
self.assertIn(
- '<h3 class="result_title"><img width="14" height="14" class="favicon" src="/static/themes/legacy/img/icons/icon_youtube.ico" alt="youtube" /><a href="http://second.test.xyz" rel="noreferrer">Second <span class="highlight">Test</span></a></h3>', # noqa
+ b'<h3 class="result_title"><img width="14" height="14" class="favicon" src="/static/themes/legacy/img/icons/icon_youtube.ico" alt="youtube" /><a href="http://second.test.xyz" rel="noreferrer">Second <span class="highlight">Test</span></a></h3>', # noqa
result.data
)
self.assertIn(
- '<p class="content">first <span class="highlight">test</span> content<br class="last"/></p>', # noqa
+ b'<p class="content">first <span class="highlight">test</span> content<br class="last"/></p>', # noqa
result.data
)
def test_index_json(self):
result = self.app.post('/', data={'q': 'test', 'format': 'json'})
- result_dict = json.loads(result.data)
+ result_dict = json.loads(result.data.decode('utf-8'))
self.assertEqual('test', result_dict['query'])
- self.assertEqual(
- result_dict['results'][0]['content'], 'first test content')
- self.assertEqual(
- result_dict['results'][0]['url'], 'http://first.test.xyz')
+ self.assertEqual(result_dict['results'][0]['content'], 'first test content')
+ self.assertEqual(result_dict['results'][0]['url'], 'http://first.test.xyz')
def test_index_csv(self):
result = self.app.post('/', data={'q': 'test', 'format': 'csv'})
self.assertEqual(
- 'title,url,content,host,engine,score\r\n'
- 'First Test,http://first.test.xyz,first test content,first.test.xyz,startpage,\r\n' # noqa
- 'Second Test,http://second.test.xyz,second test content,second.test.xyz,youtube,\r\n', # noqa
+ b'title,url,content,host,engine,score\r\n'
+ b'First Test,http://first.test.xyz,first test content,first.test.xyz,startpage,\r\n' # noqa
+ b'Second Test,http://second.test.xyz,second test content,second.test.xyz,youtube,\r\n', # noqa
result.data
)
@@ -95,65 +93,65 @@ class ViewsTestCase(SearxTestCase):
result = self.app.post('/', data={'q': 'test', 'format': 'rss'})
self.assertIn(
- '<description>Search results for "test" - searx</description>',
+ b'<description>Search results for "test" - searx</description>',
result.data
)
self.assertIn(
- '<opensearch:totalResults>3</opensearch:totalResults>',
+ b'<opensearch:totalResults>3</opensearch:totalResults>',
result.data
)
self.assertIn(
- '<title>First Test</title>',
+ b'<title>First Test</title>',
result.data
)
self.assertIn(
- '<link>http://first.test.xyz</link>',
+ b'<link>http://first.test.xyz</link>',
result.data
)
self.assertIn(
- '<description>first test content</description>',
+ b'<description>first test content</description>',
result.data
)
def test_about(self):
result = self.app.get('/about')
self.assertEqual(result.status_code, 200)
- self.assertIn('<h1>About <a href="/">searx</a></h1>', result.data)
+ self.assertIn(b'<h1>About <a href="/">searx</a></h1>', result.data)
def test_preferences(self):
result = self.app.get('/preferences')
self.assertEqual(result.status_code, 200)
self.assertIn(
- '<form method="post" action="/preferences" id="search_form">',
+ b'<form method="post" action="/preferences" id="search_form">',
result.data
)
self.assertIn(
- '<legend>Default categories</legend>',
+ b'<legend>Default categories</legend>',
result.data
)
self.assertIn(
- '<legend>Interface language</legend>',
+ b'<legend>Interface language</legend>',
result.data
)
def test_stats(self):
result = self.app.get('/stats')
self.assertEqual(result.status_code, 200)
- self.assertIn('<h2>Engine stats</h2>', result.data)
+ self.assertIn(b'<h2>Engine stats</h2>', result.data)
def test_robots_txt(self):
result = self.app.get('/robots.txt')
self.assertEqual(result.status_code, 200)
- self.assertIn('Allow: /', result.data)
+ self.assertIn(b'Allow: /', result.data)
def test_opensearch_xml(self):
result = self.app.get('/opensearch.xml')
self.assertEqual(result.status_code, 200)
- self.assertIn('<Description>a privacy-respecting, hackable metasearch engine</Description>', result.data)
+ self.assertIn(b'<Description>a privacy-respecting, hackable metasearch engine</Description>', result.data)
def test_favicon(self):
result = self.app.get('/favicon.ico')