summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoémi Ványi <kvch@users.noreply.github.com>2020-09-12 14:51:35 +0200
committerGitHub <noreply@github.com>2020-09-12 14:51:35 +0200
commit2370234d0978f59dd62efa4a4931e41ad31444d1 (patch)
treed3863e22b3d34092484146ce0bdc6e0ca8d36216
parent272158944bf13503e2597018fc60a00baddec660 (diff)
parentbdac99d4f0349a71d7ecb9a4c61687356afedd6b (diff)
downloadsearxng-2370234d0978f59dd62efa4a4931e41ad31444d1.tar.gz
searxng-2370234d0978f59dd62efa4a4931e41ad31444d1.zip
Merge pull request #2137 from dalf/drop-python-2
Drop Python 2
-rw-r--r--Makefile5
-rwxr-xr-xmanage.sh4
-rw-r--r--searx/__init__.py8
-rw-r--r--searx/answerers/__init__.py8
-rw-r--r--searx/answerers/random/answerer.py30
-rw-r--r--searx/answerers/statistics/answerer.py15
-rw-r--r--searx/autocomplete.py17
-rw-r--r--searx/engines/1337x.py3
-rw-r--r--searx/engines/acgsou.py4
-rw-r--r--searx/engines/apkmirror.py3
-rw-r--r--searx/engines/archlinux.py4
-rw-r--r--searx/engines/arxiv.py4
-rwxr-xr-xsearx/engines/base.py4
-rw-r--r--searx/engines/bing.py4
-rw-r--r--searx/engines/bing_images.py4
-rw-r--r--searx/engines/bing_news.py3
-rw-r--r--searx/engines/bing_videos.py2
-rw-r--r--searx/engines/btdigg.py2
-rw-r--r--searx/engines/currency_convert.py7
-rw-r--r--searx/engines/dailymotion.py2
-rw-r--r--searx/engines/deezer.py4
-rw-r--r--searx/engines/deviantart.py3
-rw-r--r--searx/engines/dictzone.py8
-rw-r--r--searx/engines/digbt.py5
-rw-r--r--searx/engines/digg.py2
-rw-r--r--searx/engines/doku.py2
-rw-r--r--searx/engines/duckduckgo.py2
-rw-r--r--searx/engines/duckduckgo_definitions.py2
-rw-r--r--searx/engines/duckduckgo_images.py2
-rw-r--r--searx/engines/duden.py2
-rw-r--r--searx/engines/etools.py2
-rw-r--r--searx/engines/fdroid.py2
-rw-r--r--searx/engines/filecrop.py7
-rw-r--r--searx/engines/flickr.py2
-rw-r--r--searx/engines/flickr_noapi.py10
-rw-r--r--searx/engines/framalibre.py7
-rw-r--r--searx/engines/frinkiac.py2
-rw-r--r--searx/engines/genius.py2
-rw-r--r--searx/engines/gentoo.py4
-rw-r--r--searx/engines/gigablast.py2
-rw-r--r--searx/engines/github.py2
-rw-r--r--searx/engines/google.py2
-rw-r--r--searx/engines/google_images.py5
-rw-r--r--searx/engines/google_news.py2
-rw-r--r--searx/engines/google_videos.py2
-rw-r--r--searx/engines/ina.py7
-rw-r--r--searx/engines/invidious.py2
-rw-r--r--searx/engines/json_engine.py7
-rw-r--r--searx/engines/kickass.py2
-rw-r--r--searx/engines/mediawiki.py4
-rw-r--r--searx/engines/microsoft_academic.py3
-rw-r--r--searx/engines/mixcloud.py2
-rw-r--r--searx/engines/nyaa.py2
-rw-r--r--searx/engines/openstreetmap.py8
-rw-r--r--searx/engines/peertube.py2
-rw-r--r--searx/engines/photon.py2
-rw-r--r--searx/engines/piratebay.py8
-rw-r--r--searx/engines/pubmed.py2
-rw-r--r--searx/engines/qwant.py6
-rw-r--r--searx/engines/reddit.py2
-rw-r--r--searx/engines/scanr_structures.py4
-rw-r--r--searx/engines/searchcode_code.py2
-rw-r--r--searx/engines/searchcode_doc.py2
-rw-r--r--searx/engines/seedpeer.py2
-rw-r--r--searx/engines/soundcloud.py9
-rw-r--r--searx/engines/spotify.py8
-rw-r--r--searx/engines/stackoverflow.py2
-rw-r--r--searx/engines/tokyotoshokan.py2
-rw-r--r--searx/engines/torrentz.py2
-rw-r--r--searx/engines/translated.py10
-rw-r--r--searx/engines/twitter.py2
-rw-r--r--searx/engines/unsplash.py2
-rw-r--r--searx/engines/vimeo.py2
-rw-r--r--searx/engines/wikidata.py14
-rw-r--r--searx/engines/wikipedia.py4
-rw-r--r--searx/engines/wolframalpha_api.py20
-rw-r--r--searx/engines/wolframalpha_noapi.py2
-rw-r--r--searx/engines/www1x.py2
-rw-r--r--searx/engines/xpath.py6
-rw-r--r--searx/engines/yacy.py2
-rw-r--r--searx/engines/yahoo.py2
-rw-r--r--searx/engines/yahoo_news.py4
-rw-r--r--searx/engines/yandex.py2
-rw-r--r--searx/engines/yggtorrent.py2
-rw-r--r--searx/engines/youtube_api.py2
-rw-r--r--searx/engines/youtube_noapi.py2
-rw-r--r--searx/exceptions.py2
-rw-r--r--searx/external_bang.py2
-rw-r--r--searx/languages.py138
-rw-r--r--searx/plugins/__init__.py7
-rw-r--r--searx/plugins/https_rewrite.py5
-rw-r--r--searx/plugins/oa_doi_rewrite.py2
-rw-r--r--searx/plugins/self_info.py4
-rw-r--r--searx/plugins/tracker_url_remover.py2
-rw-r--r--searx/poolrequests.py6
-rw-r--r--searx/preferences.py17
-rw-r--r--searx/query.py21
-rw-r--r--searx/results.py17
-rw-r--r--searx/search.py23
-rw-r--r--searx/templates/courgette/404.html2
-rw-r--r--searx/templates/legacy/404.html2
-rw-r--r--searx/templates/oscar/404.html2
-rw-r--r--searx/templates/simple/404.html2
-rw-r--r--searx/testing.py4
-rw-r--r--searx/url_utils.py30
-rw-r--r--searx/utils.py78
-rwxr-xr-xsearx/webapp.py73
-rw-r--r--tests/unit/test_answerers.py4
-rw-r--r--tests/unit/test_plugins.py16
-rw-r--r--tests/unit/test_preferences.py32
-rw-r--r--tests/unit/test_query.py52
-rw-r--r--tests/unit/test_search.py16
-rw-r--r--tests/unit/test_utils.py20
-rw-r--r--tests/unit/test_webapp.py4
-rw-r--r--utils/fabfile.py2
-rw-r--r--utils/fetch_currencies.py20
-rwxr-xr-xutils/fetch_firefox_version.py2
-rw-r--r--utils/fetch_languages.py13
-rw-r--r--utils/makefile.python10
-rwxr-xr-xutils/standalone_searx.py3
120 files changed, 438 insertions, 585 deletions
diff --git a/Makefile b/Makefile
index 2f10c62bc..636ed275b 100644
--- a/Makefile
+++ b/Makefile
@@ -213,10 +213,6 @@ gecko.driver:
PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot
test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot
-ifeq ($(PY),2)
-test.pylint:
- @echo "LINT skip liniting py2"
-else
# TODO: balance linting with pylint
test.pylint: pyenvinstall
@@ -225,7 +221,6 @@ test.pylint: pyenvinstall
searx/testing.py \
searx/engines/gigablast.py \
)
-endif
# ignored rules:
# E402 module level import not at top of file
diff --git a/manage.sh b/manage.sh
index b3c57bf88..78571e45b 100755
--- a/manage.sh
+++ b/manage.sh
@@ -39,7 +39,7 @@ install_geckodriver() {
return
fi
GECKODRIVER_VERSION="v0.24.0"
- PLATFORM="`python -c "import six; import platform; six.print_(platform.system().lower(), platform.architecture()[0])"`"
+ PLATFORM="`python3 -c "import platform; print(platform.system().lower(), platform.architecture()[0])"`"
case "$PLATFORM" in
"linux 32bit" | "linux2 32bit") ARCH="linux32";;
"linux 64bit" | "linux2 64bit") ARCH="linux64";;
@@ -136,7 +136,7 @@ docker_build() {
# Check consistency between the git tag and the searx/version.py file
# /!\ HACK : parse Python file with bash /!\
# otherwise it is not possible build the docker image without all Python dependencies ( version.py loads __init__.py )
- # SEARX_PYTHON_VERSION=$(python -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)")
+ # SEARX_PYTHON_VERSION=$(python3 -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)")
SEARX_PYTHON_VERSION=$(cat searx/version.py | grep "\(VERSION_MAJOR\|VERSION_MINOR\|VERSION_BUILD\) =" | cut -d\= -f2 | sed -e 's/^[[:space:]]*//' | paste -sd "." -)
if [ $(echo "$SEARX_GIT_VERSION" | cut -d- -f1) != "$SEARX_PYTHON_VERSION" ]; then
echo "Inconsistency between the last git tag and the searx/version.py file"
diff --git a/searx/__init__.py b/searx/__init__.py
index 1ba03ad63..80a7ffc76 100644
--- a/searx/__init__.py
+++ b/searx/__init__.py
@@ -21,12 +21,8 @@ from os import environ
from os.path import realpath, dirname, join, abspath, isfile
from io import open
from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION
-try:
- from yaml import safe_load
-except:
- from sys import exit, stderr
- stderr.write('[E] install pyyaml\n')
- exit(2)
+from yaml import safe_load
+
searx_dir = abspath(dirname(__file__))
engine_dir = dirname(realpath(__file__))
diff --git a/searx/answerers/__init__.py b/searx/answerers/__init__.py
index 444316f11..97e7e5854 100644
--- a/searx/answerers/__init__.py
+++ b/searx/answerers/__init__.py
@@ -1,12 +1,8 @@
from os import listdir
from os.path import realpath, dirname, join, isdir
-from sys import version_info
from searx.utils import load_module
from collections import defaultdict
-if version_info[0] == 3:
- unicode = str
-
answerers_dir = dirname(realpath(__file__))
@@ -36,10 +32,10 @@ def ask(query):
results = []
query_parts = list(filter(None, query.query.split()))
- if query_parts[0].decode('utf-8') not in answerers_by_keywords:
+ if query_parts[0] not in answerers_by_keywords:
return results
- for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
+ for answerer in answerers_by_keywords[query_parts[0]]:
result = answerer(query)
if result:
results.append(result)
diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py
index 4aafa2cfd..d5223e517 100644
--- a/searx/answerers/random/answerer.py
+++ b/searx/answerers/random/answerer.py
@@ -1,7 +1,6 @@
import hashlib
import random
import string
-import sys
import uuid
from flask_babel import gettext
@@ -10,12 +9,7 @@ from flask_babel import gettext
keywords = ('random',)
random_int_max = 2**31
-
-if sys.version_info[0] == 2:
- random_string_letters = string.lowercase + string.digits + string.uppercase
-else:
- unicode = str
- random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
+random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
def random_characters():
@@ -24,32 +18,32 @@ def random_characters():
def random_string():
- return u''.join(random_characters())
+ return ''.join(random_characters())
def random_float():
- return unicode(random.random())
+ return str(random.random())
def random_int():
- return unicode(random.randint(-random_int_max, random_int_max))
+ return str(random.randint(-random_int_max, random_int_max))
def random_sha256():
m = hashlib.sha256()
m.update(''.join(random_characters()).encode())
- return unicode(m.hexdigest())
+ return str(m.hexdigest())
def random_uuid():
- return unicode(uuid.uuid4())
+ return str(uuid.uuid4())
-random_types = {b'string': random_string,
- b'int': random_int,
- b'float': random_float,
- b'sha256': random_sha256,
- b'uuid': random_uuid}
+random_types = {'string': random_string,
+ 'int': random_int,
+ 'float': random_float,
+ 'sha256': random_sha256,
+ 'uuid': random_uuid}
# required answerer function
@@ -70,4 +64,4 @@ def answer(query):
def self_info():
return {'name': gettext('Random value generator'),
'description': gettext('Generate different random values'),
- 'examples': [u'random {}'.format(x.decode('utf-8')) for x in random_types]}
+ 'examples': ['random {}'.format(x) for x in random_types]}
diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py
index 73dd25cfd..abd4be7f5 100644
--- a/searx/answerers/statistics/answerer.py
+++ b/searx/answerers/statistics/answerer.py
@@ -1,11 +1,8 @@
-from sys import version_info
from functools import reduce
from operator import mul
from flask_babel import gettext
-if version_info[0] == 3:
- unicode = str
keywords = ('min',
'max',
@@ -30,21 +27,21 @@ def answer(query):
func = parts[0]
answer = None
- if func == b'min':
+ if func == 'min':
answer = min(args)
- elif func == b'max':
+ elif func == 'max':
answer = max(args)
- elif func == b'avg':
+ elif func == 'avg':
answer = sum(args) / len(args)
- elif func == b'sum':
+ elif func == 'sum':
answer = sum(args)
- elif func == b'prod':
+ elif func == 'prod':
answer = reduce(mul, args, 1)
if answer is None:
return []
- return [{'answer': unicode(answer)}]
+ return [{'answer': str(answer)}]
# required answerer function
diff --git a/searx/autocomplete.py b/searx/autocomplete.py
index 00a9f9553..9bc6a98f2 100644
--- a/searx/autocomplete.py
+++ b/searx/autocomplete.py
@@ -16,19 +16,16 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
'''
-import sys
from lxml import etree
from json import loads
+from urllib.parse import urlencode
+
from searx import settings
from searx.languages import language_codes
from searx.engines import (
categories, engines, engine_shortcuts
)
from searx.poolrequests import get as http_get
-from searx.url_utils import urlencode
-
-if sys.version_info[0] == 3:
- unicode = str
def get(*args, **kwargs):
@@ -85,22 +82,22 @@ def searx_bang(full_query):
engine_query = full_query.getSearchQuery()[1:]
for lc in language_codes:
- lang_id, lang_name, country, english_name = map(unicode.lower, lc)
+ lang_id, lang_name, country, english_name = map(str.lower, lc)
# check if query starts with language-id
if lang_id.startswith(engine_query):
if len(engine_query) <= 2:
- results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
+ results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
else:
- results.append(u':{lang_id}'.format(lang_id=lang_id))
+ results.append(':{lang_id}'.format(lang_id=lang_id))
# check if query starts with language name
if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
- results.append(u':{lang_name}'.format(lang_name=lang_name))
+ results.append(':{lang_name}'.format(lang_name=lang_name))
# check if query starts with country
if country.startswith(engine_query.replace('_', ' ')):
- results.append(u':{country}'.format(country=country.replace(' ', '_')))
+ results.append(':{country}'.format(country=country.replace(' ', '_')))
# remove duplicates
result_set = set(results)
diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py
index 0de04bd95..76a7a1634 100644
--- a/searx/engines/1337x.py
+++ b/searx/engines/1337x.py
@@ -1,7 +1,8 @@
+from urllib.parse import quote, urljoin
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
-from searx.url_utils import quote, urljoin
+
url = 'https://1337x.to/'
search_url = url + 'search/{search_term}/{pageno}/'
diff --git a/searx/engines/acgsou.py b/searx/engines/acgsou.py
index cca28f0db..d5d3e3178 100644
--- a/searx/engines/acgsou.py
+++ b/searx/engines/acgsou.py
@@ -9,9 +9,9 @@
@parse url, title, content, seed, leech, torrentfile
"""
+from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
from searx.utils import get_torrent_size, int_or_zero
# engine dependent config
@@ -63,7 +63,7 @@ def response(resp):
except:
pass
# I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
- content = u'Category: "{category}".'
+ content = 'Category: "{category}".'
content = content.format(category=category)
results.append({'url': href,
diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py
index f2ee12b29..4e6dcd486 100644
--- a/searx/engines/apkmirror.py
+++ b/searx/engines/apkmirror.py
@@ -9,9 +9,10 @@
@parse url, title, thumbnail_src
"""
+from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
+
# engine dependent config
categories = ['it']
diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py
index dce862f55..e2f44b0f5 100644
--- a/searx/engines/archlinux.py
+++ b/searx/engines/archlinux.py
@@ -11,9 +11,9 @@
@parse url, title
"""
+from urllib.parse import urlencode, urljoin
from lxml import html
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']
@@ -105,7 +105,7 @@ def request(query, params):
# if our language is hosted on the main site, we need to add its name
# to the query in order to narrow the results to that language
if language in main_langs:
- query += b' (' + main_langs[language] + b')'
+ query += ' (' + main_langs[language] + ')'
# prepare the request parameters
query = urlencode({'search': query})
diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py
index e3c871d17..77ddc572e 100644
--- a/searx/engines/arxiv.py
+++ b/searx/engines/arxiv.py
@@ -11,9 +11,9 @@
More info on api: https://arxiv.org/help/api/user-manual
"""
+from urllib.parse import urlencode
from lxml import html
from datetime import datetime
-from searx.url_utils import urlencode
categories = ['science']
@@ -30,7 +30,7 @@ def request(query, params):
# basic search
offset = (params['pageno'] - 1) * number_of_results
- string_args = dict(query=query.decode('utf-8'),
+ string_args = dict(query=query,
offset=offset,
number_of_results=number_of_results)
diff --git a/searx/engines/base.py b/searx/engines/base.py
index f1b1cf671..0114f9798 100755
--- a/searx/engines/base.py
+++ b/searx/engines/base.py
@@ -13,10 +13,10 @@
More info on api: http://base-search.net/about/download/base_interface.pdf
"""
+from urllib.parse import urlencode
from lxml import etree
from datetime import datetime
import re
-from searx.url_utils import urlencode
from searx.utils import searx_useragent
@@ -55,7 +55,7 @@ shorcut_dict = {
def request(query, params):
# replace shortcuts with API advanced search keywords
for key in shorcut_dict.keys():
- query = re.sub(key, shorcut_dict[key], str(query))
+ query = re.sub(key, shorcut_dict[key], query)
# basic search
offset = (params['pageno'] - 1) * number_of_results
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index afb776acd..c7b619369 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -14,10 +14,10 @@
"""
import re
+from urllib.parse import urlencode
from lxml import html
from searx import logger, utils
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
from searx.utils import match_language, gen_useragent, eval_xpath
logger = logger.getChild('bing engine')
@@ -47,7 +47,7 @@ def request(query, params):
else:
lang = match_language(params['language'], supported_languages, language_aliases)
- query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
+ query = 'language:{} {}'.format(lang.split('-')[0].upper(), query)
search_path = search_string.format(
query=urlencode({'q': query}),
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 93b25008c..10da42b5c 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -12,10 +12,10 @@
"""
+from urllib.parse import urlencode
from lxml import html
from json import loads
import re
-from searx.url_utils import urlencode
from searx.utils import match_language
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
@@ -91,7 +91,7 @@ def response(resp):
# strip 'Unicode private use area' highlighting, they render to Tux
# the Linux penguin and a standing diamond on my machine...
- title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '')
+ title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
results.append({'template': 'images.html',
'url': m['purl'],
'thumbnail_src': m['turl'],
diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py
index d13be777c..fbe51faed 100644
--- a/searx/engines/bing_news.py
+++ b/searx/engines/bing_news.py
@@ -13,10 +13,9 @@
from datetime import datetime
from dateutil import parser
+from urllib.parse import urlencode, urlparse, parse_qsl
from lxml import etree
from searx.utils import list_get, match_language
-from searx.url_utils import urlencode, urlparse, parse_qsl
-
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
# engine dependent config
diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py
index f048f0d8e..63264de6f 100644
--- a/searx/engines/bing_videos.py
+++ b/searx/engines/bing_videos.py
@@ -12,7 +12,7 @@
from json import loads
from lxml import html
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
from searx.utils import match_language
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py
index 82eedc24b..2faade3e2 100644
--- a/searx/engines/btdigg.py
+++ b/searx/engines/btdigg.py
@@ -12,8 +12,8 @@
from lxml import html
from operator import itemgetter
+from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text
-from searx.url_utils import quote, urljoin
from searx.utils import get_torrent_size
# engine dependent config
diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py
index 8eab8f673..c6067c4a8 100644
--- a/searx/engines/currency_convert.py
+++ b/searx/engines/currency_convert.py
@@ -1,26 +1,23 @@
import json
import re
import os
-import sys
import unicodedata
from io import open
from datetime import datetime
-if sys.version_info[0] == 3:
- unicode = str
categories = []
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
weight = 100
-parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
+parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
db = 1
def normalize_name(name):
- name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
+ name = name.lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()
diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py
index 1038e64bf..1e24e41da 100644
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@@ -14,7 +14,7 @@
from json import loads
from datetime import datetime
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
from searx.utils import match_language, html_to_text
# engine dependent config
diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py
index af63478fb..48c0429a7 100644
--- a/searx/engines/deezer.py
+++ b/searx/engines/deezer.py
@@ -11,7 +11,7 @@
"""
from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
# engine dependent config
categories = ['music']
@@ -50,7 +50,7 @@ def response(resp):
if url.startswith('http://'):
url = 'https' + url[4:]
- content = u'{} - {} - {}'.format(
+ content = '{} - {} - {}'.format(
result['artist']['name'],
result['album']['title'],
result['title'])
diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py
index a0e27e622..2bd21fa5d 100644
--- a/searx/engines/deviantart.py
+++ b/searx/engines/deviantart.py
@@ -14,8 +14,9 @@
from lxml import html
import re
+from urllib.parse import urlencode
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
+
# engine dependent config
categories = ['images']
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
index 423af0971..5a1fea3cf 100644
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
@@ -10,15 +10,15 @@
"""
import re
+from urllib.parse import urljoin
from lxml import html
from searx.utils import is_valid_lang, eval_xpath
-from searx.url_utils import urljoin
categories = ['general']
-url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
+url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100
-parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
+parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
results_xpath = './/table[@id="r"]/tr'
@@ -37,7 +37,7 @@ def request(query, params):
params['url'] = url.format(from_lang=from_lang[2],
to_lang=to_lang[2],
- query=query.decode('utf-8'))
+ query=query)
return params
diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py
index ff2f94593..e2c0389c6 100644
--- a/searx/engines/digbt.py
+++ b/searx/engines/digbt.py
@@ -10,14 +10,11 @@
@parse url, title, content, magnetlink
"""
-from sys import version_info
+from urllib.parse import urljoin
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
-from searx.url_utils import urljoin
-if version_info[0] == 3:
- unicode = str
categories = ['videos', 'music', 'files']
paging = True
diff --git a/searx/engines/digg.py b/searx/engines/digg.py
index 073410eb0..24a932d53 100644
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
@@ -14,8 +14,8 @@ import random
import string
from dateutil import parser
from json import loads
+from urllib.parse import urlencode
from lxml import html
-from searx.url_utils import urlencode
from datetime import datetime
# engine dependent config
diff --git a/searx/engines/doku.py b/searx/engines/doku.py
index d20e66026..513ffda89 100644
--- a/searx/engines/doku.py
+++ b/searx/engines/doku.py
@@ -9,10 +9,10 @@
# @stable yes
# @parse (general) url, title, content
+from urllib.parse import urlencode
from lxml.html import fromstring
from searx.engines.xpath import extract_text
from searx.utils import eval_xpath
-from searx.url_utils import urlencode
# engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py
index 6e07b5021..fb1ea2b2d 100644
--- a/searx/engines/duckduckgo.py
+++ b/searx/engines/duckduckgo.py
@@ -15,9 +15,9 @@
from lxml.html import fromstring
from json import loads
+from urllib.parse import urlencode
from searx.engines.xpath import extract_text
from searx.poolrequests import get
-from searx.url_utils import urlencode
from searx.utils import match_language, eval_xpath
# engine dependent config
diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py
index 79d10c303..73154a525 100644
--- a/searx/engines/duckduckgo_definitions.py
+++ b/searx/engines/duckduckgo_definitions.py
@@ -10,11 +10,11 @@ DuckDuckGo (definitions)
"""
import json
+from urllib.parse import urlencode
from lxml import html
from re import compile
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
-from searx.url_utils import urlencode
from searx.utils import html_to_text, match_language
url = 'https://api.duckduckgo.com/'\
diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py
index 89924b71c..38e141f8b 100644
--- a/searx/engines/duckduckgo_images.py
+++ b/searx/engines/duckduckgo_images.py
@@ -14,13 +14,13 @@
"""
from json import loads
+from urllib.parse import urlencode
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import (
_fetch_supported_languages, supported_languages_url,
get_region_code, language_aliases
)
from searx.poolrequests import get
-from searx.url_utils import urlencode
# engine dependent config
categories = ['images']
diff --git a/searx/engines/duden.py b/searx/engines/duden.py
index cf2f1a278..a711f422e 100644
--- a/searx/engines/duden.py
+++ b/searx/engines/duden.py
@@ -10,9 +10,9 @@
from lxml import html, etree
import re
+from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text
from searx.utils import eval_xpath
-from searx.url_utils import quote, urljoin
from searx import logger
categories = ['general']
diff --git a/searx/engines/etools.py b/searx/engines/etools.py
index a9eb0980d..efc102ef6 100644
--- a/searx/engines/etools.py
+++ b/searx/engines/etools.py
@@ -10,8 +10,8 @@
"""
from lxml import html
+from urllib.parse import quote
from searx.engines.xpath import extract_text
-from searx.url_utils import quote
from searx.utils import eval_xpath
categories = ['general']
diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py
index 4066dc716..a2a5114df 100644
--- a/searx/engines/fdroid.py
+++ b/searx/engines/fdroid.py
@@ -9,9 +9,9 @@
@parse url, title, content
"""
+from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
# engine dependent config
categories = ['files']
diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py
index ed57a6bf3..eef5be6e8 100644
--- a/searx/engines/filecrop.py
+++ b/searx/engines/filecrop.py
@@ -1,9 +1,6 @@
-from searx.url_utils import urlencode
+from html.parser import HTMLParser
+from urllib.parse import urlencode
-try:
- from HTMLParser import HTMLParser
-except:
- from html.parser import HTMLParser
url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py
index de1769370..b23c447b8 100644
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
@@ -14,7 +14,7 @@
"""
from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
categories = ['images']
diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py
index 1cbb3e0a9..4bcf837cb 100644
--- a/searx/engines/flickr_noapi.py
+++ b/searx/engines/flickr_noapi.py
@@ -15,8 +15,8 @@
from json import loads
from time import time
import re
+from urllib.parse import urlencode
from searx.engines import logger
-from searx.url_utils import urlencode
from searx.utils import ecma_unescape, html_to_text
logger = logger.getChild('flickr-noapi')
@@ -117,10 +117,10 @@ def response(resp):
'img_format': img_format,
'template': 'images.html'
}
- result['author'] = author.encode('utf-8', 'ignore').decode('utf-8')
- result['source'] = source.encode('utf-8', 'ignore').decode('utf-8')
- result['title'] = title.encode('utf-8', 'ignore').decode('utf-8')
- result['content'] = content.encode('utf-8', 'ignore').decode('utf-8')
+ result['author'] = author.encode(errors='ignore').decode()
+ result['source'] = source.encode(errors='ignore').decode()
+ result['title'] = title.encode(errors='ignore').decode()
+ result['content'] = content.encode(errors='ignore').decode()
results.append(result)
return results
diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py
index f3441fa5f..14b659b5f 100644
--- a/searx/engines/framalibre.py
+++ b/searx/engines/framalibre.py
@@ -10,13 +10,10 @@
@parse url, title, content, thumbnail, img_src
"""
-try:
- from cgi import escape
-except:
- from html import escape
+from html import escape
+from urllib.parse import urljoin, urlencode
from lxml import html
from searx.engines.xpath import extract_text
-from searx.url_utils import urljoin, urlencode
# engine dependent config
categories = ['it']
diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py
index a67b42dbe..5b174a687 100644
--- a/searx/engines/frinkiac.py
+++ b/searx/engines/frinkiac.py
@@ -10,7 +10,7 @@ Frinkiac (Images)
"""
from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
categories = ['images']
diff --git a/searx/engines/genius.py b/searx/engines/genius.py
index aa5afad9b..feb7d79d1 100644
--- a/searx/engines/genius.py
+++ b/searx/engines/genius.py
@@ -11,7 +11,7 @@ Genius
"""
from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
from datetime import datetime
# engine dependent config
diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py
index a7a966cc9..b6bc99fab 100644
--- a/searx/engines/gentoo.py
+++ b/searx/engines/gentoo.py
@@ -11,9 +11,9 @@
@parse url, title
"""
+from urllib.parse import urlencode, urljoin
from lxml import html
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']
@@ -90,7 +90,7 @@ def request(query, params):
# if our language is hosted on the main site, we need to add its name
# to the query in order to narrow the results to that language
if language in main_langs:
- query += b' (' + (main_langs[language]).encode('utf-8') + b')'
+ query += ' (' + main_langs[language] + ')'
# prepare the request parameters
query = urlencode({'search': query})
diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py
index b139c2a9f..1d71b18e9 100644
--- a/searx/engines/gigablast.py
+++ b/searx/engines/gigablast.py
@@ -14,8 +14,8 @@
import re
from json import loads
+from urllib.parse import urlencode
# from searx import logger
-from searx.url_utils import urlencode
from searx.poolrequests import get
# engine dependent config
diff --git a/searx/engines/github.py b/searx/engines/github.py
index eaa00da4f..80b50ceda 100644
--- a/searx/engines/github.py
+++ b/searx/engines/github.py
@@ -11,7 +11,7 @@
"""
from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
# engine dependent config
categories = ['it']
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 093ad6bd7..dfc8a0ab8 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -18,11 +18,11 @@ Definitions`_.
# pylint: disable=invalid-name, missing-function-docstring
+from urllib.parse import urlencode, urlparse
from lxml import html
from flask_babel import gettext
from searx.engines.xpath import extract_text
from searx import logger
-from searx.url_utils import urlencode, urlparse
from searx.utils import match_language, eval_xpath
logger = logger.getChild('google engine')
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index f0e9e27e3..9dd5fad2c 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -24,11 +24,10 @@ Definitions`_.
"""
-import urllib
+from urllib.parse import urlencode, urlparse, unquote
from lxml import html
from flask_babel import gettext
from searx import logger
-from searx.url_utils import urlencode, urlparse
from searx.utils import eval_xpath
from searx.engines.xpath import extract_text
@@ -87,7 +86,7 @@ def scrap_img_by_id(script, data_id):
if 'gstatic.com/images' in line and data_id in line:
url_line = _script[i + 1]
img_url = url_line.split('"')[1]
- img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%'))
+ img_url = unquote(img_url.replace(r'\u00', r'%'))
return img_url
diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py
index c9cc75435..08875328c 100644
--- a/searx/engines/google_news.py
+++ b/searx/engines/google_news.py
@@ -10,9 +10,9 @@
@parse url, title, content, publishedDate
"""
+from urllib.parse import urlencode
from lxml import html
from searx.engines.google import _fetch_supported_languages, supported_languages_url
-from searx.url_utils import urlencode
from searx.utils import match_language
# search-url
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index fd6b2e3be..08af55902 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -12,9 +12,9 @@
from datetime import date, timedelta
from json import loads
+from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
import re
# engine dependent config
diff --git a/searx/engines/ina.py b/searx/engines/ina.py
index ea509649f..cce580273 100644
--- a/searx/engines/ina.py
+++ b/searx/engines/ina.py
@@ -12,15 +12,12 @@
# @todo embedded (needs some md5 from video page)
from json import loads
+from urllib.parse import urlencode
from lxml import html
from dateutil import parser
+from html.parser import HTMLParser
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
-try:
- from HTMLParser import HTMLParser
-except:
- from html.parser import HTMLParser
# engine dependent config
categories = ['videos']
diff --git a/searx/engines/invidious.py b/searx/engines/invidious.py
index cf76fd215..6ea942699 100644
--- a/searx/engines/invidious.py
+++ b/searx/engines/invidious.py
@@ -8,7 +8,7 @@
# @stable yes
# @parse url, title, content, publishedDate, thumbnail, embedded, author, length
-from searx.url_utils import quote_plus
+from urllib.parse import quote_plus
from dateutil import parser
import time
diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py
index 785b0c490..1e5c39ac4 100644
--- a/searx/engines/json_engine.py
+++ b/searx/engines/json_engine.py
@@ -1,11 +1,8 @@
from collections import Iterable
from json import loads
-from sys import version_info
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
from searx.utils import to_string
-if version_info[0] == 3:
- unicode = str
search_url = None
url_query = None
@@ -37,8 +34,6 @@ def iterate(iterable):
def is_iterable(obj):
if type(obj) == str:
return False
- if type(obj) == unicode:
- return False
return isinstance(obj, Iterable)
diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py
index 5e897c96f..af48d990b 100644
--- a/searx/engines/kickass.py
+++ b/searx/engines/kickass.py
@@ -12,9 +12,9 @@
from lxml import html
from operator import itemgetter
+from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size, convert_str_to_int
-from searx.url_utils import quote, urljoin
# engine dependent config
categories = ['videos', 'music', 'files']
diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py
index 0607ac93b..50ba74efc 100644
--- a/searx/engines/mediawiki.py
+++ b/searx/engines/mediawiki.py
@@ -14,7 +14,7 @@
from json import loads
from string import Formatter
-from searx.url_utils import urlencode, quote
+from urllib.parse import urlencode, quote
# engine dependent config
categories = ['general']
@@ -79,7 +79,7 @@ def response(resp):
if result.get('snippet', '').startswith('#REDIRECT'):
continue
url = base_url.format(language=resp.search_params['language']) +\
- 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8'))
+ 'wiki/' + quote(result['title'].replace(' ', '_').encode())
# append result
results.append({'url': url,
diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py
index 9bac0069c..7426eef7e 100644
--- a/searx/engines/microsoft_academic.py
+++ b/searx/engines/microsoft_academic.py
@@ -12,8 +12,7 @@ Microsoft Academic (Science)
from datetime import datetime
from json import loads
from uuid import uuid4
-
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
from searx.utils import html_to_text
categories = ['images']
diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py
index 470c007ea..0606350a9 100644
--- a/searx/engines/mixcloud.py
+++ b/searx/engines/mixcloud.py
@@ -12,7 +12,7 @@
from json import loads
from dateutil import parser
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
# engine dependent config
categories = ['music']
diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py
index c57979a5f..ed8897ddc 100644
--- a/searx/engines/nyaa.py
+++ b/searx/engines/nyaa.py
@@ -10,8 +10,8 @@
"""
from lxml import html
+from urllib.parse import urlencode
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
from searx.utils import get_torrent_size, int_or_zero
# engine dependent config
diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py
index 257b1a1b3..5475c7a6d 100644
--- a/searx/engines/openstreetmap.py
+++ b/searx/engines/openstreetmap.py
@@ -30,8 +30,8 @@ route_re = re.compile('(?:from )?(.+) to (.+)')
# do search-request
def request(query, params):
- params['url'] = base_url + search_string.format(query=query.decode('utf-8'))
- params['route'] = route_re.match(query.decode('utf-8'))
+ params['url'] = base_url + search_string.format(query=query)
+ params['route'] = route_re.match(query)
return params
@@ -52,7 +52,7 @@ def response(resp):
if 'display_name' not in r:
continue
- title = r['display_name'] or u''
+ title = r['display_name'] or ''
osm_type = r.get('osm_type', r.get('type'))
url = result_base_url.format(osm_type=osm_type,
osm_id=r['osm_id'])
@@ -64,7 +64,7 @@ def response(resp):
# if no geojson is found and osm_type is a node, add geojson Point
if not geojson and osm_type == 'node':
- geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
+ geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]}
address_raw = r.get('address')
address = {}
diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py
index b3795bf83..58ff38c02 100644
--- a/searx/engines/peertube.py
+++ b/searx/engines/peertube.py
@@ -14,7 +14,7 @@
from json import loads
from datetime import datetime
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
from searx.utils import html_to_text
# engine dependent config
diff --git a/searx/engines/photon.py b/searx/engines/photon.py
index 15236f680..9201fc168 100644
--- a/searx/engines/photon.py
+++ b/searx/engines/photon.py
@@ -11,8 +11,8 @@
"""
from json import loads
+from urllib.parse import urlencode
from searx.utils import searx_useragent
-from searx.url_utils import urlencode
# engine dependent config
categories = ['map']
diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py
index 0122d6daa..42866d058 100644
--- a/searx/engines/piratebay.py
+++ b/searx/engines/piratebay.py
@@ -11,7 +11,9 @@
from json import loads
from datetime import datetime
from operator import itemgetter
-from searx.url_utils import quote
+
+from urllib.parse import quote, urljoin
+from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
# engine dependent config
@@ -62,8 +64,8 @@ def response(resp):
# parse results
for result in search_res:
link = url + "description.php?id=" + result["id"]
- magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + \
- "&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers)
+ magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\
+ + "&tr=" + "&tr=".join(trackers)
params = {
"url": link,
diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py
index 055f09226..7eb2e92f9 100644
--- a/searx/engines/pubmed.py
+++ b/searx/engines/pubmed.py
@@ -14,7 +14,7 @@
from flask_babel import gettext
from lxml import etree
from datetime import datetime
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
from searx.poolrequests import get
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py
index 54e9dafad..ac918b905 100644
--- a/searx/engines/qwant.py
+++ b/searx/engines/qwant.py
@@ -12,9 +12,9 @@
from datetime import datetime
from json import loads
-from searx.utils import html_to_text
-from searx.url_utils import urlencode
-from searx.utils import match_language
+from urllib.parse import urlencode
+from searx.utils import html_to_text, match_language
+
# engine dependent config
categories = None
diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py
index d19724906..e732875cb 100644
--- a/searx/engines/reddit.py
+++ b/searx/engines/reddit.py
@@ -12,7 +12,7 @@
import json
from datetime import datetime
-from searx.url_utils import urlencode, urljoin, urlparse
+from urllib.parse import urlencode, urljoin, urlparse
# engine dependent config
categories = ['general', 'images', 'news', 'social media']
diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py
index 7208dcb70..6dbbf4fd9 100644
--- a/searx/engines/scanr_structures.py
+++ b/searx/engines/scanr_structures.py
@@ -11,7 +11,7 @@
"""
from json import loads, dumps
-from searx.utils import html_to_text
+from urllib.parse import html_to_text
# engine dependent config
categories = ['science']
@@ -29,7 +29,7 @@ def request(query, params):
params['url'] = search_url
params['method'] = 'POST'
params['headers']['Content-type'] = "application/json"
- params['data'] = dumps({"query": query.decode('utf-8'),
+ params['data'] = dumps({"query": query,
"searchField": "ALL",
"sortDirection": "ASC",
"sortOrder": "RELEVANCY",
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
index 789e8e7a9..706285814 100644
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@@ -11,7 +11,7 @@
"""
from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
# engine dependent config
diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py
index 4b8e9a84a..878d2e792 100644
--- a/searx/engines/searchcode_doc.py
+++ b/searx/engines/searchcode_doc.py
@@ -11,7 +11,7 @@
"""
from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
# engine dependent config
categories = ['it']
diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py
index f9b1f99c8..3778abe7b 100644
--- a/searx/engines/seedpeer.py
+++ b/searx/engines/seedpeer.py
@@ -11,7 +11,7 @@
from lxml import html
from json import loads
from operator import itemgetter
-from searx.url_utils import quote, urljoin
+from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text
diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py
index 284689bf6..5165ea3ea 100644
--- a/searx/engines/soundcloud.py
+++ b/searx/engines/soundcloud.py
@@ -14,14 +14,11 @@ import re
from json import loads
from lxml import html
from dateutil import parser
+from io import StringIO
+from urllib.parse import quote_plus, urlencode
from searx import logger
from searx.poolrequests import get as http_get
-from searx.url_utils import quote_plus, urlencode
-try:
- from cStringIO import StringIO
-except:
- from io import StringIO
# engine dependent config
categories = ['music']
@@ -61,7 +58,7 @@ def get_client_id():
# gets app_js and searches for the clientid
response = http_get(app_js_url)
if response.ok:
- cids = cid_re.search(response.content.decode("utf-8"))
+ cids = cid_re.search(response.content.decode())
if cids is not None and len(cids.groups()):
return cids.groups()[0]
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py
index 00c395706..74942326e 100644
--- a/searx/engines/spotify.py
+++ b/searx/engines/spotify.py
@@ -11,7 +11,7 @@
"""
from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
import requests
import base64
@@ -39,8 +39,8 @@ def request(query, params):
'https://accounts.spotify.com/api/token',
data={'grant_type': 'client_credentials'},
headers={'Authorization': 'Basic ' + base64.b64encode(
- "{}:{}".format(api_client_id, api_client_secret).encode('utf-8')
- ).decode('utf-8')}
+ "{}:{}".format(api_client_id, api_client_secret).encode()
+ ).decode()}
)
j = loads(r.text)
params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
@@ -59,7 +59,7 @@ def response(resp):
if result['type'] == 'track':
title = result['name']
url = result['external_urls']['spotify']
- content = u'{} - {} - {}'.format(
+ content = '{} - {} - {}'.format(
result['artists'][0]['name'],
result['album']['name'],
result['name'])
diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py
index 25875aa15..90e4543d7 100644
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@@ -10,9 +10,9 @@
@parse url, title, content
"""
+from urllib.parse import urlencode, urljoin
from lxml import html
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']
diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py
index 773212043..9c8774d7c 100644
--- a/searx/engines/tokyotoshokan.py
+++ b/searx/engines/tokyotoshokan.py
@@ -11,10 +11,10 @@
"""
import re
+from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from datetime import datetime
-from searx.url_utils import urlencode
from searx.utils import get_torrent_size, int_or_zero
# engine dependent config
diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py
index c5e515acf..fcc8c042c 100644
--- a/searx/engines/torrentz.py
+++ b/searx/engines/torrentz.py
@@ -12,10 +12,10 @@
"""
import re
+from urllib.parse import urlencode
from lxml import html
from datetime import datetime
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
from searx.utils import get_torrent_size
# engine dependent config
diff --git a/searx/engines/translated.py b/searx/engines/translated.py
index 6cb18ff39..a50e7c830 100644
--- a/searx/engines/translated.py
+++ b/searx/engines/translated.py
@@ -12,11 +12,11 @@ import re
from searx.utils import is_valid_lang
categories = ['general']
-url = u'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
-web_url = u'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
+url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
+web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
weight = 100
-parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
+parser_re = re.compile('.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
api_key = ''
@@ -39,9 +39,9 @@ def request(query, params):
key_form = ''
params['url'] = url.format(from_lang=from_lang[1],
to_lang=to_lang[1],
- query=query.decode('utf-8'),
+ query=query,
key=key_form)
- params['query'] = query.decode('utf-8')
+ params['query'] = query
params['from_lang'] = from_lang
params['to_lang'] = to_lang
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index d2a8d2088..549b14e96 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -12,10 +12,10 @@
@todo publishedDate
"""
+from urllib.parse import urlencode, urljoin
from lxml import html
from datetime import datetime
from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['social media']
diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py
index 2e8d6fdfc..45c6b30da 100644
--- a/searx/engines/unsplash.py
+++ b/searx/engines/unsplash.py
@@ -10,7 +10,7 @@
@parse url, title, img_src, thumbnail_src
"""
-from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl
+from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
from json import loads
url = 'https://unsplash.com/'
diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py
index a92271019..fd3abc858 100644
--- a/searx/engines/vimeo.py
+++ b/searx/engines/vimeo.py
@@ -12,9 +12,9 @@
# @todo rewrite to api
# @todo set content-parameter with correct data
+from urllib.parse import urlencode
from json import loads
from dateutil import parser
-from searx.url_utils import urlencode
# engine dependent config
categories = ['videos']
diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index eb7e1dc71..ffa3724fd 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -15,9 +15,9 @@ from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
-from searx.url_utils import urlencode
from searx.utils import match_language, eval_xpath
+from urllib.parse import urlencode
from json import loads
from lxml.html import fromstring
from lxml import etree
@@ -76,7 +76,7 @@ def request(query, params):
def response(resp):
results = []
htmlparser = etree.HTMLParser()
- html = fromstring(resp.content.decode("utf-8"), parser=htmlparser)
+ html = fromstring(resp.content.decode(), parser=htmlparser)
search_results = eval_xpath(html, wikidata_ids_xpath)
if resp.search_params['language'].split('-')[0] == 'all':
@@ -89,7 +89,7 @@ def response(resp):
wikidata_id = search_result.split('/')[-1]
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
htmlresponse = get(url)
- jsonresponse = loads(htmlresponse.content.decode("utf-8"))
+ jsonresponse = loads(htmlresponse.content.decode())
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser)
return results
@@ -453,16 +453,16 @@ def get_geolink(result):
latitude, longitude = coordinates.split(',')
# convert to decimal
- lat = int(latitude[:latitude.find(u'°')])
+ lat = int(latitude[:latitude.find('°')])
if latitude.find('\'') >= 0:
- lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0
+ lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0
if latitude.find('"') >= 0:
lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0
if latitude.find('S') >= 0:
lat *= -1
- lon = int(longitude[:longitude.find(u'°')])
+ lon = int(longitude[:longitude.find('°')])
if longitude.find('\'') >= 0:
- lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0
+ lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0
if longitude.find('"') >= 0:
lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0
if longitude.find('W') >= 0:
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
index bff24d16b..620ec3c14 100644
--- a/searx/engines/wikipedia.py
+++ b/searx/engines/wikipedia.py
@@ -10,13 +10,13 @@
@parse url, infobox
"""
+from urllib.parse import quote
from json import loads
from lxml.html import fromstring
-from searx.url_utils import quote
from searx.utils import match_language, searx_useragent
# search-url
-search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
+search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py
index 1c58c4a9b..520eaa209 100644
--- a/searx/engines/wolframalpha_api.py
+++ b/searx/engines/wolframalpha_api.py
@@ -9,7 +9,7 @@
# @parse url, infobox
from lxml import etree
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
# search-url
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
@@ -45,15 +45,15 @@ def request(query, params):
# replace private user area characters to make text legible
def replace_pua_chars(text):
- pua_chars = {u'\uf522': u'\u2192', # rigth arrow
- u'\uf7b1': u'\u2115', # set of natural numbers
- u'\uf7b4': u'\u211a', # set of rational numbers
- u'\uf7b5': u'\u211d', # set of real numbers
- u'\uf7bd': u'\u2124', # set of integer numbers
- u'\uf74c': 'd', # differential
- u'\uf74d': u'\u212f', # euler's number
- u'\uf74e': 'i', # imaginary number
- u'\uf7d9': '='} # equals sign
+ pua_chars = {'\uf522': '\u2192', # rigth arrow
+ '\uf7b1': '\u2115', # set of natural numbers
+ '\uf7b4': '\u211a', # set of rational numbers
+ '\uf7b5': '\u211d', # set of real numbers
+ '\uf7bd': '\u2124', # set of integer numbers
+ '\uf74c': 'd', # differential
+ '\uf74d': '\u212f', # euler's number
+ '\uf74e': 'i', # imaginary number
+ '\uf7d9': '='} # equals sign
for k, v in pua_chars.items():
text = text.replace(k, v)
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 387c9fa17..943d4f3fb 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -10,9 +10,9 @@
from json import loads
from time import time
+from urllib.parse import urlencode
from searx.poolrequests import get as http_get
-from searx.url_utils import urlencode
# search-url
url = 'https://www.wolframalpha.com/'
diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py
index f1154b16d..1cb74dbad 100644
--- a/searx/engines/www1x.py
+++ b/searx/engines/www1x.py
@@ -11,7 +11,7 @@
"""
from lxml import html
-from searx.url_utils import urlencode, urljoin
+from urllib.parse import urlencode, urljoin
from searx.engines.xpath import extract_text
# engine dependent config
diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py
index a9f3e4bdd..bd97a93a5 100644
--- a/searx/engines/xpath.py
+++ b/searx/engines/xpath.py
@@ -1,7 +1,7 @@
+from urllib.parse import unquote, urlencode, urljoin, urlparse
from lxml import html
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
from searx.utils import html_to_text, eval_xpath
-from searx.url_utils import unquote, urlencode, urljoin, urlparse
search_url = None
url_xpath = None
@@ -56,7 +56,7 @@ def extract_url(xpath_results, search_url):
if url.startswith('//'):
# add http or https to this kind of url //example.com/
parsed_search_url = urlparse(search_url)
- url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
+ url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
elif url.startswith('/'):
# fix relative url to the search engine
url = urljoin(search_url, url)
@@ -86,7 +86,7 @@ def normalize_url(url):
p = parsed_url.path
mark = p.find('/**')
if mark != -1:
- return unquote(p[mark + 3:]).decode('utf-8')
+ return unquote(p[mark + 3:]).decode()
return url
diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py
index f1d4c6abe..daa151082 100644
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -14,7 +14,7 @@
from json import loads
from dateutil import parser
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
from searx.utils import html_to_text
diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py
index a6b4aeb9f..0133b57b5 100644
--- a/searx/engines/yahoo.py
+++ b/searx/engines/yahoo.py
@@ -11,9 +11,9 @@
@parse url, title, content, suggestion
"""
+from urllib.parse import unquote, urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
-from searx.url_utils import unquote, urlencode
from searx.utils import match_language, eval_xpath
# engine dependent config
diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py
index 9f6a4159b..345e4d91f 100644
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@@ -11,13 +11,13 @@
import re
from datetime import datetime, timedelta
+from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import (
parse_url, _fetch_supported_languages, supported_languages_url, language_aliases
)
from dateutil import parser
-from searx.url_utils import urlencode
from searx.utils import match_language
# engine dependent config
@@ -58,7 +58,7 @@ def request(query, params):
def sanitize_url(url):
if ".yahoo.com/" in url:
- return re.sub(u"\\;\\_ylt\\=.+$", "", url)
+ return re.sub("\\;\\_ylt\\=.+$", "", url)
else:
return url
diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py
index 1c789f6cb..ff1ef5a26 100644
--- a/searx/engines/yandex.py
+++ b/searx/engines/yandex.py
@@ -9,9 +9,9 @@
@parse url, title, content
"""
+from urllib.parse import urlencode
from lxml import html
from searx import logger
-from searx.url_utils import urlencode
logger = logger.getChild('yandex engine')
diff --git a/searx/engines/yggtorrent.py b/searx/engines/yggtorrent.py
index 739574e8d..37bf3b1d9 100644
--- a/searx/engines/yggtorrent.py
+++ b/searx/engines/yggtorrent.py
@@ -11,8 +11,8 @@
from lxml import html
from operator import itemgetter
from datetime import datetime
+from urllib.parse import quote
from searx.engines.xpath import extract_text
-from searx.url_utils import quote
from searx.utils import get_torrent_size
from searx.poolrequests import get as http_get
diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py
index bc4c0d58e..2542169a6 100644
--- a/searx/engines/youtube_api.py
+++ b/searx/engines/youtube_api.py
@@ -10,7 +10,7 @@
from json import loads
from dateutil import parser
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
# engine dependent config
categories = ['videos', 'music']
diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py
index 68a3739a2..fef501458 100644
--- a/searx/engines/youtube_noapi.py
+++ b/searx/engines/youtube_noapi.py
@@ -10,9 +10,9 @@
from functools import reduce
from json import loads
+from urllib.parse import quote_plus
from searx.engines.xpath import extract_text
from searx.utils import list_get
-from searx.url_utils import quote_plus
# engine dependent config
categories = ['videos', 'music']
diff --git a/searx/exceptions.py b/searx/exceptions.py
index 0175acfa3..4af816272 100644
--- a/searx/exceptions.py
+++ b/searx/exceptions.py
@@ -27,7 +27,7 @@ class SearxParameterException(SearxException):
message = 'Empty ' + name + ' parameter'
else:
message = 'Invalid value "' + value + '" for parameter ' + name
- super(SearxParameterException, self).__init__(message)
+ super().__init__(message)
self.message = message
self.parameter_name = name
self.parameter_value = value
diff --git a/searx/external_bang.py b/searx/external_bang.py
index 0b4c4ae16..92b6e6a09 100644
--- a/searx/external_bang.py
+++ b/searx/external_bang.py
@@ -23,7 +23,7 @@ def get_bang_url(search_query):
"""
if search_query.external_bang:
- query = search_query.query.decode('utf-8', 'ignore')
+ query = search_query.query
bang = _get_bang(search_query.external_bang)
if bang and query:
diff --git a/searx/languages.py b/searx/languages.py
index 72e1a735e..7fd96ab1e 100644
--- a/searx/languages.py
+++ b/searx/languages.py
@@ -3,73 +3,73 @@
# this file is generated automatically by utils/update_search_languages.py
language_codes = (
- (u"af-NA", u"Afrikaans", u"", u"Afrikaans"),
- (u"ar-SA", u"العربية", u"", u"Arabic"),
- (u"be-BY", u"Беларуская", u"", u"Belarusian"),
- (u"bg-BG", u"Български", u"", u"Bulgarian"),
- (u"ca-AD", u"Català", u"", u"Catalan"),
- (u"cs-CZ", u"Čeština", u"", u"Czech"),
- (u"da-DK", u"Dansk", u"", u"Danish"),
- (u"de", u"Deutsch", u"", u"German"),
- (u"de-AT", u"Deutsch", u"Österreich", u"German"),
- (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
- (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
- (u"el-GR", u"Ελληνικά", u"", u"Greek"),
- (u"en", u"English", u"", u"English"),
- (u"en-AU", u"English", u"Australia", u"English"),
- (u"en-CA", u"English", u"Canada", u"English"),
- (u"en-GB", u"English", u"United Kingdom", u"English"),
- (u"en-IE", u"English", u"Ireland", u"English"),
- (u"en-IN", u"English", u"India", u"English"),
- (u"en-NZ", u"English", u"New Zealand", u"English"),
- (u"en-PH", u"English", u"Philippines", u"English"),
- (u"en-SG", u"English", u"Singapore", u"English"),
- (u"en-US", u"English", u"United States", u"English"),
- (u"es", u"Español", u"", u"Spanish"),
- (u"es-AR", u"Español", u"Argentina", u"Spanish"),
- (u"es-CL", u"Español", u"Chile", u"Spanish"),
- (u"es-ES", u"Español", u"España", u"Spanish"),
- (u"es-MX", u"Español", u"México", u"Spanish"),
- (u"et-EE", u"Eesti", u"", u"Estonian"),
- (u"fa-IR", u"فارسی", u"", u"Persian"),
- (u"fi-FI", u"Suomi", u"", u"Finnish"),
- (u"fr", u"Français", u"", u"French"),
- (u"fr-BE", u"Français", u"Belgique", u"French"),
- (u"fr-CA", u"Français", u"Canada", u"French"),
- (u"fr-CH", u"Français", u"Suisse", u"French"),
- (u"fr-FR", u"Français", u"France", u"French"),
- (u"he-IL", u"עברית", u"", u"Hebrew"),
- (u"hr-HR", u"Hrvatski", u"", u"Croatian"),
- (u"hu-HU", u"Magyar", u"", u"Hungarian"),
- (u"hy-AM", u"Հայերեն", u"", u"Armenian"),
- (u"id-ID", u"Indonesia", u"", u"Indonesian"),
- (u"is-IS", u"Íslenska", u"", u"Icelandic"),
- (u"it-IT", u"Italiano", u"", u"Italian"),
- (u"ja-JP", u"日本語", u"", u"Japanese"),
- (u"ko-KR", u"한국어", u"", u"Korean"),
- (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
- (u"lv-LV", u"Latviešu", u"", u"Latvian"),
- (u"ms-MY", u"Melayu", u"", u"Malay"),
- (u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"),
- (u"nl", u"Nederlands", u"", u"Dutch"),
- (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
- (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
- (u"pl-PL", u"Polski", u"", u"Polish"),
- (u"pt", u"Português", u"", u"Portuguese"),
- (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
- (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
- (u"ro-RO", u"Română", u"", u"Romanian"),
- (u"ru-RU", u"Русский", u"", u"Russian"),
- (u"sk-SK", u"Slovenčina", u"", u"Slovak"),
- (u"sl-SI", u"Slovenščina", u"", u"Slovenian"),
- (u"sr-RS", u"Srpski", u"", u"Serbian"),
- (u"sv-SE", u"Svenska", u"", u"Swedish"),
- (u"sw-KE", u"Kiswahili", u"", u"Swahili"),
- (u"th-TH", u"ไทย", u"", u"Thai"),
- (u"tr-TR", u"Türkçe", u"", u"Turkish"),
- (u"uk-UA", u"Українська", u"", u"Ukrainian"),
- (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
- (u"zh", u"中文", u"", u"Chinese"),
- (u"zh-CN", u"中文", u"中国", u"Chinese"),
- (u"zh-TW", u"中文", u"台灣", u"Chinese")
+ ("af-NA", "Afrikaans", "", "Afrikaans"),
+ ("ar-SA", "العربية", "", "Arabic"),
+ ("be-BY", "Беларуская", "", "Belarusian"),
+ ("bg-BG", "Български", "", "Bulgarian"),
+ ("ca-AD", "Català", "", "Catalan"),
+ ("cs-CZ", "Čeština", "", "Czech"),
+ ("da-DK", "Dansk", "", "Danish"),
+ ("de", "Deutsch", "", "German"),
+ ("de-AT", "Deutsch", "Österreich", "German"),
+ ("de-CH", "Deutsch", "Schweiz", "German"),
+ ("de-DE", "Deutsch", "Deutschland", "German"),
+ ("el-GR", "Ελληνικά", "", "Greek"),
+ ("en", "English", "", "English"),
+ ("en-AU", "English", "Australia", "English"),
+ ("en-CA", "English", "Canada", "English"),
+ ("en-GB", "English", "United Kingdom", "English"),
+ ("en-IE", "English", "Ireland", "English"),
+ ("en-IN", "English", "India", "English"),
+ ("en-NZ", "English", "New Zealand", "English"),
+ ("en-PH", "English", "Philippines", "English"),
+ ("en-SG", "English", "Singapore", "English"),
+ ("en-US", "English", "United States", "English"),
+ ("es", "Español", "", "Spanish"),
+ ("es-AR", "Español", "Argentina", "Spanish"),
+ ("es-CL", "Español", "Chile", "Spanish"),
+ ("es-ES", "Español", "España", "Spanish"),
+ ("es-MX", "Español", "México", "Spanish"),
+ ("et-EE", "Eesti", "", "Estonian"),
+ ("fa-IR", "فارسی", "", "Persian"),
+ ("fi-FI", "Suomi", "", "Finnish"),
+ ("fr", "Français", "", "French"),
+ ("fr-BE", "Français", "Belgique", "French"),
+ ("fr-CA", "Français", "Canada", "French"),
+ ("fr-CH", "Français", "Suisse", "French"),
+ ("fr-FR", "Français", "France", "French"),
+ ("he-IL", "עברית", "", "Hebrew"),
+ ("hr-HR", "Hrvatski", "", "Croatian"),
+ ("hu-HU", "Magyar", "", "Hungarian"),
+ ("hy-AM", "Հայերեն", "", "Armenian"),
+ ("id-ID", "Indonesia", "", "Indonesian"),
+ ("is-IS", "Íslenska", "", "Icelandic"),
+ ("it-IT", "Italiano", "", "Italian"),
+ ("ja-JP", "日本語", "", "Japanese"),
+ ("ko-KR", "한국어", "", "Korean"),
+ ("lt-LT", "Lietuvių", "", "Lithuanian"),
+ ("lv-LV", "Latviešu", "", "Latvian"),
+ ("ms-MY", "Melayu", "", "Malay"),
+ ("nb-NO", "Norsk Bokmål", "", "Norwegian Bokmål"),
+ ("nl", "Nederlands", "", "Dutch"),
+ ("nl-BE", "Nederlands", "België", "Dutch"),
+ ("nl-NL", "Nederlands", "Nederland", "Dutch"),
+ ("pl-PL", "Polski", "", "Polish"),
+ ("pt", "Português", "", "Portuguese"),
+ ("pt-BR", "Português", "Brasil", "Portuguese"),
+ ("pt-PT", "Português", "Portugal", "Portuguese"),
+ ("ro-RO", "Română", "", "Romanian"),
+ ("ru-RU", "Русский", "", "Russian"),
+ ("sk-SK", "Slovenčina", "", "Slovak"),
+ ("sl-SI", "Slovenščina", "", "Slovenian"),
+ ("sr-RS", "Srpski", "", "Serbian"),
+ ("sv-SE", "Svenska", "", "Swedish"),
+ ("sw-KE", "Kiswahili", "", "Swahili"),
+ ("th-TH", "ไทย", "", "Thai"),
+ ("tr-TR", "Türkçe", "", "Turkish"),
+ ("uk-UA", "Українська", "", "Ukrainian"),
+ ("vi-VN", "Tiếng Việt", "", "Vietnamese"),
+ ("zh", "中文", "", "Chinese"),
+ ("zh-CN", "中文", "中国", "Chinese"),
+ ("zh-TW", "中文", "台灣", "Chinese")
)
diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py
index 791c40c21..51f6981a2 100644
--- a/searx/plugins/__init__.py
+++ b/searx/plugins/__init__.py
@@ -20,13 +20,10 @@ from importlib import import_module
from os import listdir, makedirs, remove, stat, utime
from os.path import abspath, basename, dirname, exists, join
from shutil import copyfile
-from sys import version_info
from traceback import print_exc
from searx import logger, settings, static_path
-if version_info[0] == 3:
- unicode = str
logger = logger.getChild('plugins')
@@ -38,8 +35,8 @@ from searx.plugins import (oa_doi_rewrite,
tracker_url_remover,
vim_hotkeys)
-required_attrs = (('name', (str, unicode)),
- ('description', (str, unicode)),
+required_attrs = (('name', str),
+ ('description', str),
('default_on', bool))
optional_attrs = (('js_dependencies', tuple),
diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py
index 82556017e..aeb42495e 100644
--- a/searx/plugins/https_rewrite.py
+++ b/searx/plugins/https_rewrite.py
@@ -16,17 +16,14 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
'''
import re
-import sys
+from urllib.parse import urlparse
from lxml import etree
from os import listdir, environ
from os.path import isfile, isdir, join
from searx.plugins import logger
from flask_babel import gettext
from searx import searx_dir
-from searx.url_utils import urlparse
-if sys.version_info[0] == 3:
- unicode = str
name = "HTTPS rewrite"
description = gettext('Rewrite HTTP links to HTTPS if possible')
diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py
index be80beb26..eef29f103 100644
--- a/searx/plugins/oa_doi_rewrite.py
+++ b/searx/plugins/oa_doi_rewrite.py
@@ -1,6 +1,6 @@
+from urllib.parse import urlparse, parse_qsl
from flask_babel import gettext
import re
-from searx.url_utils import urlparse, parse_qsl
from searx import settings
diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py
index cdd3e9a6e..4fdfb4288 100644
--- a/searx/plugins/self_info.py
+++ b/searx/plugins/self_info.py
@@ -22,7 +22,7 @@ default_on = True
# Self User Agent regex
-p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
+p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
# attach callback to the post search hook
@@ -31,7 +31,7 @@ p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
def post_search(request, search):
if search.search_query.pageno > 1:
return True
- if search.search_query.query == b'ip':
+ if search.search_query.query == 'ip':
x_forwarded_for = request.headers.getlist("X-Forwarded-For")
if x_forwarded_for:
ip = x_forwarded_for[0]
diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py
index 33dd621e1..742f39013 100644
--- a/searx/plugins/tracker_url_remover.py
+++ b/searx/plugins/tracker_url_remover.py
@@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from flask_babel import gettext
import re
-from searx.url_utils import urlunparse, parse_qsl, urlencode
+from urllib.parse import urlunparse, parse_qsl, urlencode
regexes = {re.compile(r'utm_[^&]+'),
re.compile(r'(wkey|wemail)[^&]*'),
diff --git a/searx/poolrequests.py b/searx/poolrequests.py
index 9f0ee8736..51b6219c3 100644
--- a/searx/poolrequests.py
+++ b/searx/poolrequests.py
@@ -20,7 +20,7 @@ class HTTPAdapterWithConnParams(requests.adapters.HTTPAdapter):
self.config = {}
self.proxy_manager = {}
- super(requests.adapters.HTTPAdapter, self).__init__()
+ super().__init__()
self._pool_connections = pool_connections
self._pool_maxsize = pool_maxsize
@@ -60,7 +60,7 @@ else:
class SessionSinglePool(requests.Session):
def __init__(self):
- super(SessionSinglePool, self).__init__()
+ super().__init__()
# reuse the same adapters
with RLock():
@@ -71,7 +71,7 @@ class SessionSinglePool(requests.Session):
def close(self):
"""Call super, but clear adapters since there are managed globaly"""
self.adapters.clear()
- super(SessionSinglePool, self).close()
+ super().close()
def set_timeout_for_thread(timeout, start_time=None):
diff --git a/searx/preferences.py b/searx/preferences.py
index 82b8f5224..3042636a6 100644
--- a/searx/preferences.py
+++ b/searx/preferences.py
@@ -6,16 +6,11 @@
from base64 import urlsafe_b64encode, urlsafe_b64decode
from zlib import compress, decompress
-from sys import version
+from urllib.parse import parse_qs, urlencode
from searx import settings, autocomplete
from searx.languages import language_codes as languages
from searx.utils import match_language
-from searx.url_utils import parse_qs, urlencode
-
-if version[0] == '3':
- # pylint: disable=invalid-name
- unicode = str
COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years
@@ -37,7 +32,7 @@ class ValidationException(Exception):
"""
-class Setting(object):
+class Setting:
"""Base class of user settings"""
def __init__(self, default_value, **kwargs):
@@ -315,7 +310,7 @@ class PluginsSetting(SwitchableSetting):
return [item[len('plugin_'):] for item in items]
-class Preferences(object):
+class Preferences:
"""Validates and saves preferences to cookies"""
def __init__(self, themes, categories, engines, plugins):
@@ -402,14 +397,14 @@ class Preferences(object):
settings_kv['tokens'] = ','.join(self.tokens.values)
- return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8')
+ return urlsafe_b64encode(compress(urlencode(settings_kv).encode())).decode()
def parse_encoded_data(self, input_data):
"""parse (base64) preferences from request (``flask.request.form['preferences']``)"""
- decoded_data = decompress(urlsafe_b64decode(input_data.encode('utf-8')))
+ decoded_data = decompress(urlsafe_b64decode(input_data.encode()))
dict_data = {}
for x, y in parse_qs(decoded_data).items():
- dict_data[x.decode('utf8')] = y[0].decode('utf8')
+ dict_data[x.decode()] = y[0].decode()
self.parse_dict(dict_data)
def parse_dict(self, input_data):
diff --git a/searx/query.py b/searx/query.py
index e8b57d4ca..ef323af7a 100644
--- a/searx/query.py
+++ b/searx/query.py
@@ -17,23 +17,22 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at>
'''
+import re
+
from searx.languages import language_codes
from searx.engines import (
categories, engines, engine_shortcuts
)
-import re
-import sys
-if sys.version_info[0] == 3:
- unicode = str
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
-class RawTextQuery(object):
+class RawTextQuery:
"""parse raw text query (the value from the html input)"""
def __init__(self, query, disabled_engines):
+ assert isinstance(query, str)
self.query = query
self.disabled_engines = []
@@ -53,7 +52,7 @@ class RawTextQuery(object):
self.query_parts = []
# split query, including whitespaces
- raw_query_parts = re.split(r'(\s+)' if isinstance(self.query, str) else b'(\s+)', self.query)
+ raw_query_parts = re.split(r'(\s+)', self.query)
parse_next = True
@@ -93,7 +92,7 @@ class RawTextQuery(object):
# check if any language-code is equal with
# declared language-codes
for lc in language_codes:
- lang_id, lang_name, country, english_name = map(unicode.lower, lc)
+ lang_id, lang_name, country, english_name = map(str.lower, lc)
# if correct language-code is found
# set it as new search-language
@@ -177,15 +176,15 @@ class RawTextQuery(object):
def getFullQuery(self):
# get full querry including whitespaces
- return u''.join(self.query_parts)
+ return ''.join(self.query_parts)
-class SearchQuery(object):
+class SearchQuery:
"""container for all the search parameters (query, language, etc...)"""
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
timeout_limit=None, preferences=None, external_bang=None):
- self.query = query.encode('utf-8')
+ self.query = query
self.engines = engines
self.categories = categories
self.lang = lang
@@ -197,4 +196,4 @@ class SearchQuery(object):
self.external_bang = external_bang
def __str__(self):
- return str(self.query) + ";" + str(self.engines)
+ return self.query + ";" + str(self.engines)
diff --git a/searx/results.py b/searx/results.py
index df2e3e78d..e4cad2e24 100644
--- a/searx/results.py
+++ b/searx/results.py
@@ -1,14 +1,11 @@
import re
-import sys
from collections import defaultdict
from operator import itemgetter
from threading import RLock
+from urllib.parse import urlparse, unquote
from searx import logger
from searx.engines import engines
-from searx.url_utils import urlparse, unquote
-if sys.version_info[0] == 3:
- basestring = str
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@@ -16,7 +13,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
# return the meaningful length of the content for a result
def result_content_len(content):
- if isinstance(content, basestring):
+ if isinstance(content, str):
return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
else:
return 0
@@ -125,14 +122,14 @@ def result_score(result):
return sum((occurences * weight) / position for position in result['positions'])
-class ResultContainer(object):
+class ResultContainer:
"""docstring for ResultContainer"""
__slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\
'_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url'
def __init__(self):
- super(ResultContainer, self).__init__()
+ super().__init__()
self._merged_results = []
self.infoboxes = []
self.suggestions = set()
@@ -161,11 +158,11 @@ class ResultContainer(object):
self._number_of_results.append(result['number_of_results'])
else:
# standard result (url, title, content)
- if 'url' in result and not isinstance(result['url'], basestring):
+ if 'url' in result and not isinstance(result['url'], str):
logger.debug('result: invalid URL: %s', str(result))
- elif 'title' in result and not isinstance(result['title'], basestring):
+ elif 'title' in result and not isinstance(result['title'], str):
logger.debug('result: invalid title: %s', str(result))
- elif 'content' in result and not isinstance(result['content'], basestring):
+ elif 'content' in result and not isinstance(result['content'], str):
logger.debug('result: invalid content: %s', str(result))
else:
self._merge_result(result, standard_result_count + 1)
diff --git a/searx/search.py b/searx/search.py
index 79896e5e1..3695128ab 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -20,8 +20,8 @@ import sys
import threading
from time import time
from uuid import uuid4
+from _thread import start_new_thread
-import six
from flask_babel import gettext
import requests.exceptions
import searx.poolrequests as requests_lib
@@ -37,13 +37,6 @@ from searx import logger
from searx.plugins import plugins
from searx.exceptions import SearxParameterException
-try:
- from thread import start_new_thread
-except:
- from _thread import start_new_thread
-
-if sys.version_info[0] == 3:
- unicode = str
logger = logger.getChild('search')
@@ -355,11 +348,11 @@ def get_search_query_from_webapp(preferences, form):
load_default_categories = True
for pd_name, pd in form.items():
if pd_name == 'categories':
- query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories)
+ query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories)
elif pd_name == 'engines':
pd_engines = [{'category': engines[engine].categories[0],
'name': engine}
- for engine in map(unicode.strip, pd.split(',')) if engine in engines]
+ for engine in map(str.strip, pd.split(',')) if engine in engines]
if pd_engines:
query_engines.extend(pd_engines)
load_default_categories = False
@@ -414,12 +407,12 @@ def get_search_query_from_webapp(preferences, form):
raw_text_query)
-class Search(object):
+class Search:
"""Search information container"""
def __init__(self, search_query):
# init vars
- super(Search, self).__init__()
+ super().__init__()
self.search_query = search_query
self.result_container = ResultContainer()
self.actual_timeout = None
@@ -434,7 +427,7 @@ class Search(object):
# This means there was a valid bang and the
# rest of the search does not need to be continued
- if isinstance(self.result_container.redirect_url, six.string_types):
+ if isinstance(self.result_container.redirect_url, str):
return self.result_container
# start time
start_time = time()
@@ -541,13 +534,13 @@ class SearchWithPlugins(Search):
"""Similar to the Search class but call the plugins."""
def __init__(self, search_query, ordered_plugin_list, request):
- super(SearchWithPlugins, self).__init__(search_query)
+ super().__init__(search_query)
self.ordered_plugin_list = ordered_plugin_list
self.request = request
def search(self):
if plugins.call(self.ordered_plugin_list, 'pre_search', self.request, self):
- super(SearchWithPlugins, self).search()
+ super().search()
plugins.call(self.ordered_plugin_list, 'post_search', self.request, self)
diff --git a/searx/templates/courgette/404.html b/searx/templates/courgette/404.html
index 9e3b8ac29..7a317f023 100644
--- a/searx/templates/courgette/404.html
+++ b/searx/templates/courgette/404.html
@@ -3,7 +3,7 @@
<div class="center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
- <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
+ <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}
diff --git a/searx/templates/legacy/404.html b/searx/templates/legacy/404.html
index 3e889dd21..c0fa62b00 100644
--- a/searx/templates/legacy/404.html
+++ b/searx/templates/legacy/404.html
@@ -3,7 +3,7 @@
<div class="center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
- <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
+ <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}
diff --git a/searx/templates/oscar/404.html b/searx/templates/oscar/404.html
index 5a50880a9..cdb31db73 100644
--- a/searx/templates/oscar/404.html
+++ b/searx/templates/oscar/404.html
@@ -3,7 +3,7 @@
<div class="text-center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
- <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
+ <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}
diff --git a/searx/templates/simple/404.html b/searx/templates/simple/404.html
index 11d604313..1a10514cc 100644
--- a/searx/templates/simple/404.html
+++ b/searx/templates/simple/404.html
@@ -3,7 +3,7 @@
<div class="center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
- <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
+ <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}
diff --git a/searx/testing.py b/searx/testing.py
index f0e303e13..c52974961 100644
--- a/searx/testing.py
+++ b/searx/testing.py
@@ -17,7 +17,7 @@ from unittest2 import TestCase
class SearxTestLayer:
"""Base layer for non-robot tests."""
- __name__ = u'SearxTestLayer'
+ __name__ = 'SearxTestLayer'
@classmethod
def setUp(cls):
@@ -66,7 +66,7 @@ class SearxRobotLayer():
stderr=subprocess.STDOUT
)
if hasattr(self.server.stdout, 'read1'):
- print(self.server.stdout.read1(1024).decode('utf-8'))
+ print(self.server.stdout.read1(1024).decode())
def tearDown(self):
os.kill(self.server.pid, 9)
diff --git a/searx/url_utils.py b/searx/url_utils.py
deleted file mode 100644
index dcafc3ba8..000000000
--- a/searx/url_utils.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from sys import version_info
-
-if version_info[0] == 2:
- from urllib import quote, quote_plus, unquote, urlencode
- from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult
-else:
- from urllib.parse import (
- parse_qs,
- parse_qsl,
- quote,
- quote_plus,
- unquote,
- urlencode,
- urljoin,
- urlparse,
- urlunparse,
- ParseResult
- )
-
-
-__export__ = (parse_qs,
- parse_qsl,
- quote,
- quote_plus,
- unquote,
- urlencode,
- urljoin,
- urlparse,
- urlunparse,
- ParseResult)
diff --git a/searx/utils.py b/searx/utils.py
index 5ea9dc89c..d8842c65f 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -1,21 +1,22 @@
# -*- coding: utf-8 -*-
+import os
+import sys
import csv
import hashlib
import hmac
-import os
import re
+import json
-from babel.core import get_global
-from babel.dates import format_date
from codecs import getincrementalencoder
from imp import load_source
from numbers import Number
from os.path import splitext, join
-from io import open
+from io import open, StringIO
from random import choice
+from html.parser import HTMLParser
from lxml.etree import XPath
-import sys
-import json
+from babel.core import get_global
+from babel.dates import format_date
from searx import settings
from searx.version import VERSION_STRING
@@ -23,23 +24,6 @@ from searx.languages import language_codes
from searx import settings
from searx import logger
-try:
- from cStringIO import StringIO
-except:
- from io import StringIO
-
-try:
- from HTMLParser import HTMLParser
-except:
- from html.parser import HTMLParser
-
-if sys.version_info[0] == 3:
- unichr = chr
- unicode = str
- IS_PY2 = False
- basestring = str
-else:
- IS_PY2 = True
logger = logger.getChild('utils')
@@ -75,19 +59,18 @@ def highlight_content(content, query):
if content.find('<') != -1:
return content
- query = query.decode('utf-8')
if content.lower().find(query.lower()) > -1:
- query_regex = u'({0})'.format(re.escape(query))
+ query_regex = '({0})'.format(re.escape(query))
content = re.sub(query_regex, '<span class="highlight">\\1</span>',
content, flags=re.I | re.U)
else:
regex_parts = []
for chunk in query.split():
if len(chunk) == 1:
- regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk)))
+ regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk)))
else:
- regex_parts.append(u'{0}'.format(re.escape(chunk)))
- query_regex = u'({0})'.format('|'.join(regex_parts))
+ regex_parts.append('{0}'.format(re.escape(chunk)))
+ query_regex = '({0})'.format('|'.join(regex_parts))
content = re.sub(query_regex, '<span class="highlight">\\1</span>',
content, flags=re.I | re.U)
@@ -124,21 +107,21 @@ class HTMLTextExtractor(HTMLParser):
def handle_charref(self, number):
if not self.is_valid_tag():
return
- if number[0] in (u'x', u'X'):
+ if number[0] in ('x', 'X'):
codepoint = int(number[1:], 16)
else:
codepoint = int(number)
- self.result.append(unichr(codepoint))
+ self.result.append(chr(codepoint))
def handle_entityref(self, name):
if not self.is_valid_tag():
return
# codepoint = htmlentitydefs.name2codepoint[name]
- # self.result.append(unichr(codepoint))
+ # self.result.append(chr(codepoint))
self.result.append(name)
def get_text(self):
- return u''.join(self.result).strip()
+ return ''.join(self.result).strip()
def html_to_text(html):
@@ -163,22 +146,14 @@ class UnicodeWriter:
self.encoder = getincrementalencoder(encoding)()
def writerow(self, row):
- if IS_PY2:
- row = [s.encode("utf-8") if hasattr(s, 'encode') else s for s in row]
self.writer.writerow(row)
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
- if IS_PY2:
- data = data.decode("utf-8")
- else:
- data = data.strip('\x00')
+ data = data.strip('\x00')
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
- if IS_PY2:
- self.stream.write(data)
- else:
- self.stream.write(data.decode("utf-8"))
+ self.stream.write(data.decode())
# empty queue
self.queue.truncate(0)
@@ -253,7 +228,7 @@ def dict_subset(d, properties):
def prettify_url(url, max_length=74):
if len(url) > max_length:
chunk_len = int(max_length / 2 + 1)
- return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
+ return '{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
else:
return url
@@ -309,8 +284,10 @@ def int_or_zero(num):
def is_valid_lang(lang):
+ if isinstance(lang, bytes):
+ lang = lang.decode()
is_abbr = (len(lang) == 2)
- lang = lang.lower().decode('utf-8')
+ lang = lang.lower()
if is_abbr:
for l in language_codes:
if l[0][:2] == lang:
@@ -407,17 +384,14 @@ def new_hmac(secret_key, url):
secret_key_bytes = secret_key
else:
raise err
- if sys.version_info[0] == 2:
- return hmac.new(bytes(secret_key), url, hashlib.sha256).hexdigest()
- else:
- return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
+ return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
def to_string(obj):
- if isinstance(obj, basestring):
+ if isinstance(obj, str):
return obj
if isinstance(obj, Number):
- return unicode(obj)
+ return str(obj)
if hasattr(obj, '__str__'):
return obj.__str__()
if hasattr(obj, '__repr__'):
@@ -433,9 +407,9 @@ def ecma_unescape(s):
"""
# s = unicode(s)
# "%u5409" becomes "吉"
- s = ecma_unescape4_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
+ s = ecma_unescape4_re.sub(lambda e: chr(int(e.group(1), 16)), s)
# "%20" becomes " ", "%F3" becomes "ó"
- s = ecma_unescape2_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
+ s = ecma_unescape2_re.sub(lambda e: chr(int(e.group(1), 16)), s)
return s
diff --git a/searx/webapp.py b/searx/webapp.py
index 4c0eceaaf..a1b0413aa 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -17,37 +17,35 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
'''
+import sys
+if sys.version_info[0] < 3:
+ print('\033[1;31m Python2 is no longer supported\033[0m')
+ exit(1)
+
if __name__ == '__main__':
- from sys import path
from os.path import realpath, dirname
- path.append(realpath(dirname(realpath(__file__)) + '/../'))
+ sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
import hashlib
import hmac
import json
import os
-import sys
import requests
from searx import logger
logger = logger.getChild('webapp')
-try:
- from pygments import highlight
- from pygments.lexers import get_lexer_by_name
- from pygments.formatters import HtmlFormatter
-except:
- logger.critical("cannot import dependency: pygments")
- from sys import exit
- exit(1)
-try:
- from cgi import escape
-except:
- from html import escape
-from six import next
from datetime import datetime, timedelta
from time import time
+from html import escape
+from io import StringIO
+from urllib.parse import urlencode, urlparse, urljoin
+
+from pygments import highlight
+from pygments.lexers import get_lexer_by_name
+from pygments.formatters import HtmlFormatter
+
from werkzeug.middleware.proxy_fix import ProxyFix
from flask import (
Flask, request, render_template, url_for, Response, make_response,
@@ -78,7 +76,6 @@ from searx.plugins import plugins
from searx.plugins.oa_doi_rewrite import get_doi_resolver
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers
-from searx.url_utils import urlencode, urlparse, urljoin
from searx.utils import new_hmac
# check if the pyopenssl package is installed.
@@ -89,19 +86,6 @@ except ImportError:
logger.critical("The pyopenssl package has to be installed.\n"
"Some HTTPS connections will fail")
-try:
- from cStringIO import StringIO
-except:
- from io import StringIO
-
-
-if sys.version_info[0] == 3:
- unicode = str
- PY3 = True
-else:
- logger.warning('\033[1;31m Python2 is no longer supported\033[0m')
- exit(1)
-
# serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@@ -315,11 +299,11 @@ def proxify(url):
if not settings.get('result_proxy'):
return url
- url_params = dict(mortyurl=url.encode('utf-8'))
+ url_params = dict(mortyurl=url.encode())
if settings['result_proxy'].get('key'):
url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'],
- url.encode('utf-8'),
+ url.encode(),
hashlib.sha256).hexdigest()
return '{0}?{1}'.format(settings['result_proxy']['url'],
@@ -347,10 +331,10 @@ def image_proxify(url):
if settings.get('result_proxy'):
return proxify(url)
- h = new_hmac(settings['server']['secret_key'], url.encode('utf-8'))
+ h = new_hmac(settings['server']['secret_key'], url.encode())
return '{0}?{1}'.format(url_for('image_proxy'),
- urlencode(dict(url=url.encode('utf-8'), h=h)))
+ urlencode(dict(url=url.encode(), h=h)))
def render(template_name, override_theme=None, **kwargs):
@@ -424,8 +408,6 @@ def render(template_name, override_theme=None, **kwargs):
kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
- kwargs['unicode'] = unicode
-
kwargs['preferences'] = request.preferences
kwargs['brand'] = brand
@@ -612,7 +594,7 @@ def index():
if 'content' in result and result['content']:
result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
if 'title' in result and result['title']:
- result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
+ result['title'] = highlight_content(escape(result['title'] or ''), search_query.query)
else:
if result.get('content'):
result['content'] = html_to_text(result['content']).strip()
@@ -634,14 +616,14 @@ def index():
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
- result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
+ result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes)
else:
- result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa
+ result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa
else:
result['publishedDate'] = format_date(result['publishedDate'])
if output_format == 'json':
- return Response(json.dumps({'query': search_query.query.decode('utf-8'),
+ return Response(json.dumps({'query': search_query.query,
'number_of_results': number_of_results,
'results': results,
'answers': list(result_container.answers),
@@ -670,7 +652,7 @@ def index():
csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv')
- cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8'))
+ cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
response.headers.add('Content-Disposition', cont_disp)
return response
@@ -754,10 +736,7 @@ def autocompleter():
disabled_engines = request.preferences.engines.get_disabled()
# parse query
- if PY3:
- raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines)
- else:
- raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
+ raw_text_query = RawTextQuery(str(request.form.get('q', b'')), disabled_engines)
raw_text_query.parse_query()
# check if search query is set
@@ -879,7 +858,7 @@ def _is_selected_language_supported(engine, preferences):
@app.route('/image_proxy', methods=['GET'])
def image_proxy():
- url = request.args.get('url').encode('utf-8')
+ url = request.args.get('url').encode()
if not url:
return '', 400
@@ -1061,7 +1040,7 @@ def run():
)
-class ReverseProxyPathFix(object):
+class ReverseProxyPathFix:
'''Wrap the application in this middleware and configure the
front-end server to add these headers, to let you quietly bind
this to a URL other than / and to an HTTP scheme that is
diff --git a/tests/unit/test_answerers.py b/tests/unit/test_answerers.py
index bd8789a7e..73d8d26f2 100644
--- a/tests/unit/test_answerers.py
+++ b/tests/unit/test_answerers.py
@@ -10,7 +10,7 @@ class AnswererTest(SearxTestCase):
def test_unicode_input(self):
query = Mock()
- unicode_payload = u'árvíztűrő tükörfúrógép'
+ unicode_payload = 'árvíztűrő tükörfúrógép'
for answerer in answerers:
- query.query = u'{} {}'.format(answerer.keywords[0], unicode_payload)
+ query.query = '{} {}'.format(answerer.keywords[0], unicode_payload)
self.assertTrue(isinstance(answerer.answer(query), list))
diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py
index 10de8475a..838c1d574 100644
--- a/tests/unit/test_plugins.py
+++ b/tests/unit/test_plugins.py
@@ -48,11 +48,11 @@ class SelfIPTest(SearxTestCase):
# IP test
request = Mock(remote_addr='127.0.0.1')
request.headers.getlist.return_value = []
- search = get_search_mock(query=b'ip', pageno=1)
+ search = get_search_mock(query='ip', pageno=1)
store.call(store.plugins, 'post_search', request, search)
self.assertTrue('127.0.0.1' in search.result_container.answers["ip"]["answer"])
- search = get_search_mock(query=b'ip', pageno=2)
+ search = get_search_mock(query='ip', pageno=2)
store.call(store.plugins, 'post_search', request, search)
self.assertFalse('ip' in search.result_container.answers)
@@ -60,26 +60,26 @@ class SelfIPTest(SearxTestCase):
request = Mock(user_agent='Mock')
request.headers.getlist.return_value = []
- search = get_search_mock(query=b'user-agent', pageno=1)
+ search = get_search_mock(query='user-agent', pageno=1)
store.call(store.plugins, 'post_search', request, search)
self.assertTrue('Mock' in search.result_container.answers["user-agent"]["answer"])
- search = get_search_mock(query=b'user-agent', pageno=2)
+ search = get_search_mock(query='user-agent', pageno=2)
store.call(store.plugins, 'post_search', request, search)
self.assertFalse('user-agent' in search.result_container.answers)
- search = get_search_mock(query=b'user-agent', pageno=1)
+ search = get_search_mock(query='user-agent', pageno=1)
store.call(store.plugins, 'post_search', request, search)
self.assertTrue('Mock' in search.result_container.answers["user-agent"]["answer"])
- search = get_search_mock(query=b'user-agent', pageno=2)
+ search = get_search_mock(query='user-agent', pageno=2)
store.call(store.plugins, 'post_search', request, search)
self.assertFalse('user-agent' in search.result_container.answers)
- search = get_search_mock(query=b'What is my User-Agent?', pageno=1)
+ search = get_search_mock(query='What is my User-Agent?', pageno=1)
store.call(store.plugins, 'post_search', request, search)
self.assertTrue('Mock' in search.result_container.answers["user-agent"]["answer"])
- search = get_search_mock(query=b'What is my User-Agent?', pageno=2)
+ search = get_search_mock(query='What is my User-Agent?', pageno=2)
store.call(store.plugins, 'post_search', request, search)
self.assertFalse('user-agent' in search.result_container.answers)
diff --git a/tests/unit/test_preferences.py b/tests/unit/test_preferences.py
index 61ac0e8e4..32f50c60b 100644
--- a/tests/unit/test_preferences.py
+++ b/tests/unit/test_preferences.py
@@ -3,7 +3,7 @@ from searx.preferences import (EnumStringSetting, MapSetting, MissingArgumentExc
from searx.testing import SearxTestCase
-class PluginStub(object):
+class PluginStub:
def __init__(self, id, default_on):
self.id = id
@@ -28,13 +28,13 @@ class TestSettings(SearxTestCase):
def test_map_setting_valid_default(self):
setting = MapSetting(3, map={'dog': 1, 'bat': 2, 'cat': 3})
- self.assertEquals(setting.get_value(), 3)
+ self.assertEqual(setting.get_value(), 3)
def test_map_setting_valid_choice(self):
setting = MapSetting(3, map={'dog': 1, 'bat': 2, 'cat': 3})
- self.assertEquals(setting.get_value(), 3)
+ self.assertEqual(setting.get_value(), 3)
setting.parse('bat')
- self.assertEquals(setting.get_value(), 2)
+ self.assertEqual(setting.get_value(), 2)
def test_enum_setting_invalid_initialization(self):
with self.assertRaises(MissingArgumentException):
@@ -56,13 +56,13 @@ class TestSettings(SearxTestCase):
def test_enum_setting_valid_default(self):
setting = EnumStringSetting(3, choices=[1, 2, 3])
- self.assertEquals(setting.get_value(), 3)
+ self.assertEqual(setting.get_value(), 3)
def test_enum_setting_valid_choice(self):
setting = EnumStringSetting(3, choices=[1, 2, 3])
- self.assertEquals(setting.get_value(), 3)
+ self.assertEqual(setting.get_value(), 3)
setting.parse(2)
- self.assertEquals(setting.get_value(), 2)
+ self.assertEqual(setting.get_value(), 2)
# multiple choice settings
def test_multiple_setting_invalid_initialization(self):
@@ -80,48 +80,48 @@ class TestSettings(SearxTestCase):
def test_multiple_setting_valid_default(self):
setting = MultipleChoiceSetting(['3'], choices=['1', '2', '3'])
- self.assertEquals(setting.get_value(), ['3'])
+ self.assertEqual(setting.get_value(), ['3'])
def test_multiple_setting_valid_choice(self):
setting = MultipleChoiceSetting(['3'], choices=['1', '2', '3'])
- self.assertEquals(setting.get_value(), ['3'])
+ self.assertEqual(setting.get_value(), ['3'])
setting.parse('2')
- self.assertEquals(setting.get_value(), ['2'])
+ self.assertEqual(setting.get_value(), ['2'])
# search language settings
def test_lang_setting_valid_choice(self):
setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
setting.parse('de')
- self.assertEquals(setting.get_value(), 'de')
+ self.assertEqual(setting.get_value(), 'de')
def test_lang_setting_invalid_choice(self):
setting = SearchLanguageSetting('all', choices=['all', 'de', 'en'])
setting.parse('xx')
- self.assertEquals(setting.get_value(), 'all')
+ self.assertEqual(setting.get_value(), 'all')
def test_lang_setting_old_cookie_choice(self):
setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
setting.parse('es_XA')
- self.assertEquals(setting.get_value(), 'es')
+ self.assertEqual(setting.get_value(), 'es')
def test_lang_setting_old_cookie_format(self):
setting = SearchLanguageSetting('all', choices=['all', 'es', 'es-ES'])
setting.parse('es_ES')
- self.assertEquals(setting.get_value(), 'es-ES')
+ self.assertEqual(setting.get_value(), 'es-ES')
# plugins settings
def test_plugins_setting_all_default_enabled(self):
plugin1 = PluginStub('plugin1', True)
plugin2 = PluginStub('plugin2', True)
setting = PluginsSetting(['3'], choices=[plugin1, plugin2])
- self.assertEquals(setting.get_enabled(), set(['plugin1', 'plugin2']))
+ self.assertEqual(setting.get_enabled(), set(['plugin1', 'plugin2']))
def test_plugins_setting_few_default_enabled(self):
plugin1 = PluginStub('plugin1', True)
plugin2 = PluginStub('plugin2', False)
plugin3 = PluginStub('plugin3', True)
setting = PluginsSetting('name', choices=[plugin1, plugin2, plugin3])
- self.assertEquals(setting.get_enabled(), set(['plugin1', 'plugin3']))
+ self.assertEqual(setting.get_enabled(), set(['plugin1', 'plugin3']))
class TestPreferences(SearxTestCase):
diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py
index e4c0bdeed..86bf445e3 100644
--- a/tests/unit/test_query.py
+++ b/tests/unit/test_query.py
@@ -9,9 +9,9 @@ class TestQuery(SearxTestCase):
query = RawTextQuery(query_text, [])
query.parse_query()
- self.assertEquals(query.getFullQuery(), query_text)
- self.assertEquals(len(query.query_parts), 1)
- self.assertEquals(len(query.languages), 0)
+ self.assertEqual(query.getFullQuery(), query_text)
+ self.assertEqual(len(query.query_parts), 1)
+ self.assertEqual(len(query.languages), 0)
self.assertFalse(query.specific)
def test_language_code(self):
@@ -21,9 +21,9 @@ class TestQuery(SearxTestCase):
query = RawTextQuery(full_query, [])
query.parse_query()
- self.assertEquals(query.getFullQuery(), full_query)
- self.assertEquals(len(query.query_parts), 3)
- self.assertEquals(len(query.languages), 1)
+ self.assertEqual(query.getFullQuery(), full_query)
+ self.assertEqual(len(query.query_parts), 3)
+ self.assertEqual(len(query.languages), 1)
self.assertIn(language, query.languages)
self.assertFalse(query.specific)
@@ -34,8 +34,8 @@ class TestQuery(SearxTestCase):
query = RawTextQuery(full_query, [])
query.parse_query()
- self.assertEquals(query.getFullQuery(), full_query)
- self.assertEquals(len(query.query_parts), 3)
+ self.assertEqual(query.getFullQuery(), full_query)
+ self.assertEqual(len(query.query_parts), 3)
self.assertIn('en', query.languages)
self.assertFalse(query.specific)
@@ -46,8 +46,8 @@ class TestQuery(SearxTestCase):
query = RawTextQuery(full_query, [])
query.parse_query()
- self.assertEquals(query.getFullQuery(), full_query)
- self.assertEquals(len(query.query_parts), 3)
+ self.assertEqual(query.getFullQuery(), full_query)
+ self.assertEqual(len(query.query_parts), 3)
self.assertIn('all', query.languages)
self.assertFalse(query.specific)
@@ -58,9 +58,9 @@ class TestQuery(SearxTestCase):
query = RawTextQuery(full_query, [])
query.parse_query()
- self.assertEquals(query.getFullQuery(), full_query)
- self.assertEquals(len(query.query_parts), 1)
- self.assertEquals(len(query.languages), 0)
+ self.assertEqual(query.getFullQuery(), full_query)
+ self.assertEqual(len(query.query_parts), 1)
+ self.assertEqual(len(query.languages), 0)
self.assertFalse(query.specific)
def test_timeout_below100(self):
@@ -68,9 +68,9 @@ class TestQuery(SearxTestCase):
query = RawTextQuery(query_text, [])
query.parse_query()
- self.assertEquals(query.getFullQuery(), query_text)
- self.assertEquals(len(query.query_parts), 3)
- self.assertEquals(query.timeout_limit, 3)
+ self.assertEqual(query.getFullQuery(), query_text)
+ self.assertEqual(len(query.query_parts), 3)
+ self.assertEqual(query.timeout_limit, 3)
self.assertFalse(query.specific)
def test_timeout_above100(self):
@@ -78,9 +78,9 @@ class TestQuery(SearxTestCase):
query = RawTextQuery(query_text, [])
query.parse_query()
- self.assertEquals(query.getFullQuery(), query_text)
- self.assertEquals(len(query.query_parts), 3)
- self.assertEquals(query.timeout_limit, 0.35)
+ self.assertEqual(query.getFullQuery(), query_text)
+ self.assertEqual(len(query.query_parts), 3)
+ self.assertEqual(query.timeout_limit, 0.35)
self.assertFalse(query.specific)
def test_timeout_above1000(self):
@@ -88,9 +88,9 @@ class TestQuery(SearxTestCase):
query = RawTextQuery(query_text, [])
query.parse_query()
- self.assertEquals(query.getFullQuery(), query_text)
- self.assertEquals(len(query.query_parts), 3)
- self.assertEquals(query.timeout_limit, 3.5)
+ self.assertEqual(query.getFullQuery(), query_text)
+ self.assertEqual(len(query.query_parts), 3)
+ self.assertEqual(query.timeout_limit, 3.5)
self.assertFalse(query.specific)
def test_timeout_invalid(self):
@@ -99,8 +99,8 @@ class TestQuery(SearxTestCase):
query = RawTextQuery(query_text, [])
query.parse_query()
- self.assertEquals(query.getFullQuery(), query_text)
- self.assertEquals(len(query.query_parts), 1)
- self.assertEquals(query.query_parts[0], query_text)
- self.assertEquals(query.timeout_limit, None)
+ self.assertEqual(query.getFullQuery(), query_text)
+ self.assertEqual(len(query.query_parts), 1)
+ self.assertEqual(query.query_parts[0], query_text)
+ self.assertEqual(query.timeout_limit, None)
self.assertFalse(query.specific)
diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py
index ca9fe862a..a15d2c899 100644
--- a/tests/unit/test_search.py
+++ b/tests/unit/test_search.py
@@ -45,7 +45,7 @@ class SearchTestCase(SearxTestCase):
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
search.search()
- self.assertEquals(search.actual_timeout, 3.0)
+ self.assertEqual(search.actual_timeout, 3.0)
def test_timeout_query_above_default_nomax(self):
searx.search.max_request_timeout = None
@@ -54,7 +54,7 @@ class SearchTestCase(SearxTestCase):
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
search.search()
- self.assertEquals(search.actual_timeout, 3.0)
+ self.assertEqual(search.actual_timeout, 3.0)
def test_timeout_query_below_default_nomax(self):
searx.search.max_request_timeout = None
@@ -63,7 +63,7 @@ class SearchTestCase(SearxTestCase):
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
search.search()
- self.assertEquals(search.actual_timeout, 1.0)
+ self.assertEqual(search.actual_timeout, 1.0)
def test_timeout_query_below_max(self):
searx.search.max_request_timeout = 10.0
@@ -72,7 +72,7 @@ class SearchTestCase(SearxTestCase):
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
search.search()
- self.assertEquals(search.actual_timeout, 5.0)
+ self.assertEqual(search.actual_timeout, 5.0)
def test_timeout_query_above_max(self):
searx.search.max_request_timeout = 10.0
@@ -81,7 +81,7 @@ class SearchTestCase(SearxTestCase):
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
search.search()
- self.assertEquals(search.actual_timeout, 10.0)
+ self.assertEqual(search.actual_timeout, 10.0)
def test_query_private_engine_without_token(self):
search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}],
@@ -89,7 +89,7 @@ class SearchTestCase(SearxTestCase):
preferences=Preferences(['oscar'], ['general'], engines, []))
search = searx.search.Search(search_query)
results = search.search()
- self.assertEquals(results.results_length(), 0)
+ self.assertEqual(results.results_length(), 0)
def test_query_private_engine_with_incorrect_token(self):
preferences_with_tokens = Preferences(['oscar'], ['general'], engines, [])
@@ -99,7 +99,7 @@ class SearchTestCase(SearxTestCase):
preferences=preferences_with_tokens)
search = searx.search.Search(search_query)
results = search.search()
- self.assertEquals(results.results_length(), 0)
+ self.assertEqual(results.results_length(), 0)
def test_query_private_engine_with_correct_token(self):
preferences_with_tokens = Preferences(['oscar'], ['general'], engines, [])
@@ -109,7 +109,7 @@ class SearchTestCase(SearxTestCase):
preferences=preferences_with_tokens)
search = searx.search.Search(search_query)
results = search.search()
- self.assertEquals(results.results_length(), 1)
+ self.assertEqual(results.results_length(), 1)
def test_external_bang(self):
search_query = searx.query.SearchQuery('yes yes',
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index b09b9d414..5f98511c3 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,12 +1,8 @@
# -*- coding: utf-8 -*-
import mock
-import sys
from searx.testing import SearxTestCase
from searx import utils
-if sys.version_info[0] == 3:
- unicode = str
-
class TestUtils(SearxTestCase):
@@ -34,9 +30,9 @@ class TestUtils(SearxTestCase):
self.assertEqual(utils.highlight_content(content, None), content)
content = 'a'
- query = b'test'
+ query = 'test'
self.assertEqual(utils.highlight_content(content, query), content)
- query = b'a test'
+ query = 'a test'
self.assertEqual(utils.highlight_content(content, query), content)
def test_html_to_text(self):
@@ -52,15 +48,15 @@ class TestUtils(SearxTestCase):
</span>
</a>
"""
- self.assertIsInstance(utils.html_to_text(html), unicode)
+ self.assertIsInstance(utils.html_to_text(html), str)
self.assertIsNotNone(utils.html_to_text(html))
self.assertEqual(utils.html_to_text(html), "Test text")
def test_prettify_url(self):
data = (('https://searx.me/', 'https://searx.me/'),
- (u'https://searx.me/ű', u'https://searx.me/ű'),
+ ('https://searx.me/ű', 'https://searx.me/ű'),
('https://searx.me/' + (100 * 'a'), 'https://searx.me/[...]aaaaaaaaaaaaaaaaa'),
- (u'https://searx.me/' + (100 * u'ű'), u'https://searx.me/[...]űűűűűűűűűűűűűűűűű'))
+ ('https://searx.me/' + (100 * 'ű'), 'https://searx.me/[...]űűűűűűűűűűűűűűűűű'))
for test_url, expected in data:
self.assertEqual(utils.prettify_url(test_url, max_length=32), expected)
@@ -108,12 +104,12 @@ class TestHTMLTextExtractor(SearxTestCase):
def test_handle_charref(self):
self.html_text_extractor.handle_charref('xF')
- self.assertIn(u'\x0f', self.html_text_extractor.result)
+ self.assertIn('\x0f', self.html_text_extractor.result)
self.html_text_extractor.handle_charref('XF')
- self.assertIn(u'\x0f', self.html_text_extractor.result)
+ self.assertIn('\x0f', self.html_text_extractor.result)
self.html_text_extractor.handle_charref('97')
- self.assertIn(u'a', self.html_text_extractor.result)
+ self.assertIn('a', self.html_text_extractor.result)
def test_handle_entityref(self):
entity = 'test'
diff --git a/tests/unit/test_webapp.py b/tests/unit/test_webapp.py
index 8eed607e3..7dd465898 100644
--- a/tests/unit/test_webapp.py
+++ b/tests/unit/test_webapp.py
@@ -1,11 +1,11 @@
# -*- coding: utf-8 -*-
import json
+from urllib.parse import ParseResult
from mock import Mock
from searx import webapp
from searx.testing import SearxTestCase
from searx.search import Search
-from searx.url_utils import ParseResult
class ViewsTestCase(SearxTestCase):
@@ -89,7 +89,7 @@ class ViewsTestCase(SearxTestCase):
def test_index_json(self):
result = self.app.post('/', data={'q': 'test', 'format': 'json'})
- result_dict = json.loads(result.data.decode('utf-8'))
+ result_dict = json.loads(result.data.decode())
self.assertEqual('test', result_dict['query'])
self.assertEqual(len(result_dict['results']), 2)
diff --git a/utils/fabfile.py b/utils/fabfile.py
index 559e2ab6c..93f7fc536 100644
--- a/utils/fabfile.py
+++ b/utils/fabfile.py
@@ -1,5 +1,5 @@
from fabric.api import cd, run, sudo, put
-from cStringIO import StringIO
+from io import StringIO
base_dir = '/usr/local'
diff --git a/utils/fetch_currencies.py b/utils/fetch_currencies.py
index 5605fb387..437c375db 100644
--- a/utils/fetch_currencies.py
+++ b/utils/fetch_currencies.py
@@ -1,11 +1,11 @@
# -*- coding: utf-8 -*-
-from __future__ import print_function
+
import json
import re
import unicodedata
import string
-from urllib import urlencode
+from urllib.parse import urlencode
from requests import get
languages = {'de', 'en', 'es', 'fr', 'hu', 'it', 'nl', 'jp'}
@@ -39,7 +39,7 @@ def add_currency_name(name, iso4217):
db_names = db['names']
- if not isinstance(iso4217, basestring):
+ if not isinstance(iso4217, str):
print("problem", name, iso4217)
return
@@ -126,7 +126,7 @@ def wdq_query(query):
url = url_wmflabs_template + query
htmlresponse = get(url)
jsonresponse = json.loads(htmlresponse.content)
- qlist = map(add_q, jsonresponse.get('items', {}))
+ qlist = list(map(add_q, jsonresponse.get('items', {})))
error = jsonresponse.get('status', {}).get('error', None)
if error is not None and error != 'OK':
print("error for query '" + query + "' :" + error)
@@ -150,12 +150,12 @@ for q in wmflabs_queries:
wdq_query(q)
# static
-add_currency_name(u"euro", 'EUR')
-add_currency_name(u"euros", 'EUR')
-add_currency_name(u"dollar", 'USD')
-add_currency_name(u"dollars", 'USD')
-add_currency_name(u"peso", 'MXN')
-add_currency_name(u"pesos", 'MXN')
+add_currency_name("euro", 'EUR')
+add_currency_name("euros", 'EUR')
+add_currency_name("dollar", 'USD')
+add_currency_name("dollars", 'USD')
+add_currency_name("peso", 'MXN')
+add_currency_name("pesos", 'MXN')
# write
f = open("currencies.json", "wb")
diff --git a/utils/fetch_firefox_version.py b/utils/fetch_firefox_version.py
index 722c48229..997a752b3 100755
--- a/utils/fetch_firefox_version.py
+++ b/utils/fetch_firefox_version.py
@@ -9,9 +9,9 @@ path.append(realpath(dirname(realpath(__file__)) + '/../'))
import json
import requests
import re
+from urllib.parse import urlparse, urljoin
from distutils.version import LooseVersion, StrictVersion
from lxml import html
-from searx.url_utils import urlparse, urljoin
from searx import searx_dir
URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
diff --git a/utils/fetch_languages.py b/utils/fetch_languages.py
index 77ec0bf20..0fb9d9c18 100644
--- a/utils/fetch_languages.py
+++ b/utils/fetch_languages.py
@@ -174,14 +174,17 @@ def write_languages_file(languages):
+ '# this file is generated automatically by utils/update_search_languages.py\n'\
+ '\nlanguage_codes = ('
for code in sorted(languages):
- file_content += '\n (u"' + code + '"'\
- + ', u"' + languages[code]['name'].split(' (')[0] + '"'\
- + ', u"' + languages[code].get('country', '') + '"'\
- + ', u"' + languages[code].get('english_name', '').split(' (')[0] + '"),'
+ if 'name' in languages[code]:
+ file_content += '\n ("' + code + '"'\
+ + ', "' + languages[code]['name'].split(' (')[0] + '"'\
+ + ', "' + languages[code].get('country', '') + '"'\
+ + ', "' + languages[code].get('english_name', '').split(' (')[0] + '"),'
+ else:
+ print('ignore ',languages[code])
# remove last comma
file_content = file_content[:-1]
file_content += '\n)\n'
- new_file.write(file_content.encode('utf8'))
+ new_file.write(file_content.encode())
new_file.close()
diff --git a/utils/makefile.python b/utils/makefile.python
index df16acbbf..6c6696964 100644
--- a/utils/makefile.python
+++ b/utils/makefile.python
@@ -69,11 +69,11 @@ python-help::
@echo ' py[un]install - [un]install python objects in editable mode'
@echo ' upload-pypi - upload $(PYDIST)/* files to PyPi'
@echo 'options:'
- @echo ' make PY=2 [targets] => to eval targets with python 2 ($(PY))'
- @echo ' make PIP_INST= => to set/unset pip install options ($(PIP_INST))'
- @echo ' make TEST=. => choose test from $(TEST_FOLDER) (default "." runs all)'
- @echo ' make DEBUG= => target "debug": do not invoke PDB on errors'
- @echo ' make PY_SETUP_EXTRAS => also install extras_require from setup.py \[develop,test\]'
+ @echo ' make PY=3.7 [targets] => to eval targets with python 3.7 ($(PY))'
+ @echo ' make PIP_INST= => to set/unset pip install options ($(PIP_INST))'
+ @echo ' make TEST=. => choose test from $(TEST_FOLDER) (default "." runs all)'
+ @echo ' make DEBUG= => target "debug": do not invoke PDB on errors'
+ @echo ' make PY_SETUP_EXTRAS => also install extras_require from setup.py \[develop,test\]'
@echo ' when using target "pydebug", set breakpoints within py-source by adding::'
@echo ' DEBUG()'
diff --git a/utils/standalone_searx.py b/utils/standalone_searx.py
index 7bc1d32ed..d43b474d7 100755
--- a/utils/standalone_searx.py
+++ b/utils/standalone_searx.py
@@ -56,7 +56,7 @@ args = parser.parse_args()
# search results for the query
form = {
"q":args.query,
- "categories":args.category.decode('utf-8'),
+ "categories":args.category.decode(),
"pageno":str(args.pageno),
"language":args.lang,
"time_range":args.timerange
@@ -101,4 +101,3 @@ result_container_json = {
}
sys.stdout = codecs.getwriter("UTF-8")(sys.stdout)
sys.stdout.write(dumps(result_container_json, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8", default=json_serial))
-