diff options
author | Dalf <alex@al-f.net> | 2020-08-06 17:42:46 +0200 |
---|---|---|
committer | Alexandre Flament <alex@al-f.net> | 2020-09-10 10:39:04 +0200 |
commit | 1022228d950c2a809ed613df1a515d9a6cafda7c (patch) | |
tree | d792dddea1a5b278b018ed4e024cd13340d5c1b1 /searx/engines | |
parent | 272158944bf13503e2597018fc60a00baddec660 (diff) | |
download | searxng-1022228d950c2a809ed613df1a515d9a6cafda7c.tar.gz searxng-1022228d950c2a809ed613df1a515d9a6cafda7c.zip |
Drop Python 2 (1/n): remove unicode string and url_utils
Diffstat (limited to 'searx/engines')
79 files changed, 136 insertions, 157 deletions
diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index 0de04bd95..76a7a1634 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -1,7 +1,8 @@ +from urllib.parse import quote, urljoin from lxml import html from searx.engines.xpath import extract_text from searx.utils import get_torrent_size -from searx.url_utils import quote, urljoin + url = 'https://1337x.to/' search_url = url + 'search/{search_term}/{pageno}/' diff --git a/searx/engines/acgsou.py b/searx/engines/acgsou.py index cca28f0db..d5d3e3178 100644 --- a/searx/engines/acgsou.py +++ b/searx/engines/acgsou.py @@ -9,9 +9,9 @@ @parse url, title, content, seed, leech, torrentfile """ +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode from searx.utils import get_torrent_size, int_or_zero # engine dependent config @@ -63,7 +63,7 @@ def response(resp): except: pass # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime - content = u'Category: "{category}".' + content = 'Category: "{category}".' content = content.format(category=category) results.append({'url': href, diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index f2ee12b29..4e6dcd486 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -9,9 +9,10 @@ @parse url, title, thumbnail_src """ +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode + # engine dependent config categories = ['it'] diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index dce862f55..9e13dc42e 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -11,9 +11,9 @@ @parse url, title """ +from urllib.parse import urlencode, urljoin from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index e3c871d17..851f30bfc 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -11,9 +11,9 @@ More info on api: https://arxiv.org/help/api/user-manual """ +from urllib.parse import urlencode from lxml import html from datetime import datetime -from searx.url_utils import urlencode categories = ['science'] @@ -30,7 +30,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=query.decode('utf-8'), + string_args = dict(query=query.decode(), offset=offset, number_of_results=number_of_results) diff --git a/searx/engines/base.py b/searx/engines/base.py index f1b1cf671..34b735b3c 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -13,10 +13,10 @@ More info on api: http://base-search.net/about/download/base_interface.pdf """ +from urllib.parse import urlencode from lxml import etree from datetime import datetime import re -from searx.url_utils import urlencode from searx.utils import searx_useragent diff --git a/searx/engines/bing.py b/searx/engines/bing.py index afb776acd..eda3d42a3 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -14,10 +14,10 @@ """ import re +from urllib.parse import urlencode from lxml import html from searx import logger, utils from searx.engines.xpath import extract_text -from searx.url_utils import urlencode from searx.utils import match_language, gen_useragent, eval_xpath logger = logger.getChild('bing engine') @@ -47,7 +47,7 @@ def request(query, params): else: lang = match_language(params['language'], supported_languages, language_aliases) - query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8') + query = 'language:{} {}'.format(lang.split('-')[0].upper(), query.decode()).encode() search_path = search_string.format( query=urlencode({'q': query}), diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 93b25008c..10da42b5c 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -12,10 +12,10 @@ """ +from urllib.parse import urlencode from lxml import html from json import loads import re -from searx.url_utils import urlencode from searx.utils import match_language from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases @@ -91,7 +91,7 @@ def response(resp): # strip 'Unicode private use area' highlighting, they render to Tux # the Linux penguin and a standing diamond on my machine... - title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '') + title = m.get('t', '').replace('\ue000', '').replace('\ue001', '') results.append({'template': 'images.html', 'url': m['purl'], 'thumbnail_src': m['turl'], diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index d13be777c..fbe51faed 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -13,10 +13,9 @@ from datetime import datetime from dateutil import parser +from urllib.parse import urlencode, urlparse, parse_qsl from lxml import etree from searx.utils import list_get, match_language -from searx.url_utils import urlencode, urlparse, parse_qsl - from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases # engine dependent config diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index f048f0d8e..63264de6f 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -12,7 +12,7 @@ from json import loads from lxml import html -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import match_language from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 82eedc24b..2faade3e2 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -12,8 +12,8 @@ from lxml import html from operator import itemgetter +from urllib.parse import quote, urljoin from searx.engines.xpath import extract_text -from searx.url_utils import quote, urljoin from searx.utils import get_torrent_size # engine dependent config diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 8eab8f673..7281b7175 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -1,14 +1,11 @@ import json import re import os -import sys import unicodedata from io import open from datetime import datetime -if sys.version_info[0] == 3: - unicode = str categories = [] url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' @@ -20,7 +17,7 @@ db = 1 def normalize_name(name): - name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s') + name = name.decode().lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 1038e64bf..1e24e41da 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -14,7 +14,7 @@ from json import loads from datetime import datetime -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import match_language, html_to_text # engine dependent config diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index af63478fb..48c0429a7 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -11,7 +11,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config categories = ['music'] @@ -50,7 +50,7 @@ def response(resp): if url.startswith('http://'): url = 'https' + url[4:] - content = u'{} - {} - {}'.format( + content = '{} - {} - {}'.format( result['artist']['name'], result['album']['title'], result['title']) diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index a0e27e622..2bd21fa5d 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -14,8 +14,9 @@ from lxml import html import re +from urllib.parse import urlencode from searx.engines.xpath import extract_text -from searx.url_utils import urlencode + # engine dependent config categories = ['images'] diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 423af0971..1d8470c17 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -10,12 +10,12 @@ """ import re +from urllib.parse import urljoin from lxml import html from searx.utils import is_valid_lang, eval_xpath -from searx.url_utils import urljoin categories = ['general'] -url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' +url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' weight = 100 parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) @@ -37,7 +37,7 @@ def request(query, params): params['url'] = url.format(from_lang=from_lang[2], to_lang=to_lang[2], - query=query.decode('utf-8')) + query=query.decode()) return params diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index ff2f94593..e2c0389c6 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -10,14 +10,11 @@ @parse url, title, content, magnetlink """ -from sys import version_info +from urllib.parse import urljoin from lxml import html from searx.engines.xpath import extract_text from searx.utils import get_torrent_size -from searx.url_utils import urljoin -if version_info[0] == 3: - unicode = str categories = ['videos', 'music', 'files'] paging = True diff --git a/searx/engines/digg.py b/searx/engines/digg.py index 073410eb0..24a932d53 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -14,8 +14,8 @@ import random import string from dateutil import parser from json import loads +from urllib.parse import urlencode from lxml import html -from searx.url_utils import urlencode from datetime import datetime # engine dependent config diff --git a/searx/engines/doku.py b/searx/engines/doku.py index d20e66026..513ffda89 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -9,10 +9,10 @@ # @stable yes # @parse (general) url, title, content +from urllib.parse import urlencode from lxml.html import fromstring from searx.engines.xpath import extract_text from searx.utils import eval_xpath -from searx.url_utils import urlencode # engine dependent config categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 6e07b5021..fb1ea2b2d 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -15,9 +15,9 @@ from lxml.html import fromstring from json import loads +from urllib.parse import urlencode from searx.engines.xpath import extract_text from searx.poolrequests import get -from searx.url_utils import urlencode from searx.utils import match_language, eval_xpath # engine dependent config diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 79d10c303..73154a525 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -10,11 +10,11 @@ DuckDuckGo (definitions) """ import json +from urllib.parse import urlencode from lxml import html from re import compile from searx.engines.xpath import extract_text from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases -from searx.url_utils import urlencode from searx.utils import html_to_text, match_language url = 'https://api.duckduckgo.com/'\ diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py index 89924b71c..38e141f8b 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_images.py @@ -14,13 +14,13 @@ """ from json import loads +from urllib.parse import urlencode from searx.engines.xpath import extract_text from searx.engines.duckduckgo import ( _fetch_supported_languages, supported_languages_url, get_region_code, language_aliases ) from searx.poolrequests import get -from searx.url_utils import urlencode # engine dependent config categories = ['images'] diff --git a/searx/engines/duden.py b/searx/engines/duden.py index cf2f1a278..a711f422e 100644 --- a/searx/engines/duden.py +++ b/searx/engines/duden.py @@ -10,9 +10,9 @@ from lxml import html, etree import re +from urllib.parse import quote, urljoin from searx.engines.xpath import extract_text from searx.utils import eval_xpath -from searx.url_utils import quote, urljoin from searx import logger categories = ['general'] diff --git a/searx/engines/etools.py b/searx/engines/etools.py index a9eb0980d..efc102ef6 100644 --- a/searx/engines/etools.py +++ b/searx/engines/etools.py @@ -10,8 +10,8 @@ """ from lxml import html +from urllib.parse import quote from searx.engines.xpath import extract_text -from searx.url_utils import quote from searx.utils import eval_xpath categories = ['general'] diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index 4066dc716..a2a5114df 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -9,9 +9,9 @@ @parse url, title, content """ +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode # engine dependent config categories = ['files'] diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py index ed57a6bf3..eef5be6e8 100644 --- a/searx/engines/filecrop.py +++ b/searx/engines/filecrop.py @@ -1,9 +1,6 @@ -from searx.url_utils import urlencode +from html.parser import HTMLParser +from urllib.parse import urlencode -try: - from HTMLParser import HTMLParser -except: - from html.parser import HTMLParser url = 'http://www.filecrop.com/' search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index de1769370..b23c447b8 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -14,7 +14,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode categories = ['images'] diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 1cbb3e0a9..4bcf837cb 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -15,8 +15,8 @@ from json import loads from time import time import re +from urllib.parse import urlencode from searx.engines import logger -from searx.url_utils import urlencode from searx.utils import ecma_unescape, html_to_text logger = logger.getChild('flickr-noapi') @@ -117,10 +117,10 @@ def response(resp): 'img_format': img_format, 'template': 'images.html' } - result['author'] = author.encode('utf-8', 'ignore').decode('utf-8') - result['source'] = source.encode('utf-8', 'ignore').decode('utf-8') - result['title'] = title.encode('utf-8', 'ignore').decode('utf-8') - result['content'] = content.encode('utf-8', 'ignore').decode('utf-8') + result['author'] = author.encode(errors='ignore').decode() + result['source'] = source.encode(errors='ignore').decode() + result['title'] = title.encode(errors='ignore').decode() + result['content'] = content.encode(errors='ignore').decode() results.append(result) return results diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py index f3441fa5f..14b659b5f 100644 --- a/searx/engines/framalibre.py +++ b/searx/engines/framalibre.py @@ -10,13 +10,10 @@ @parse url, title, content, thumbnail, img_src """ -try: - from cgi import escape -except: - from html import escape +from html import escape +from urllib.parse import urljoin, urlencode from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urljoin, urlencode # engine dependent config categories = ['it'] diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py index a67b42dbe..5b174a687 100644 --- a/searx/engines/frinkiac.py +++ b/searx/engines/frinkiac.py @@ -10,7 +10,7 @@ Frinkiac (Images) """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode categories = ['images'] diff --git a/searx/engines/genius.py b/searx/engines/genius.py index aa5afad9b..feb7d79d1 100644 --- a/searx/engines/genius.py +++ b/searx/engines/genius.py @@ -11,7 +11,7 @@ Genius """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode from datetime import datetime # engine dependent config diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index a7a966cc9..b4b02e6b4 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -11,9 +11,9 @@ @parse url, title """ +from urllib.parse import urlencode, urljoin from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] @@ -90,7 +90,7 @@ def request(query, params): # if our language is hosted on the main site, we need to add its name # to the query in order to narrow the results to that language if language in main_langs: - query += b' (' + (main_langs[language]).encode('utf-8') + b')' + query += b' (' + (main_langs[language]).encode() + b')' # prepare the request parameters query = urlencode({'search': query}) diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index b139c2a9f..1d71b18e9 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -14,8 +14,8 @@ import re from json import loads +from urllib.parse import urlencode # from searx import logger -from searx.url_utils import urlencode from searx.poolrequests import get # engine dependent config diff --git a/searx/engines/github.py b/searx/engines/github.py index eaa00da4f..80b50ceda 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -11,7 +11,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config categories = ['it'] diff --git a/searx/engines/google.py b/searx/engines/google.py index 093ad6bd7..dfc8a0ab8 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -18,11 +18,11 @@ Definitions`_. # pylint: disable=invalid-name, missing-function-docstring +from urllib.parse import urlencode, urlparse from lxml import html from flask_babel import gettext from searx.engines.xpath import extract_text from searx import logger -from searx.url_utils import urlencode, urlparse from searx.utils import match_language, eval_xpath logger = logger.getChild('google engine') diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index f0e9e27e3..9dd5fad2c 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -24,11 +24,10 @@ Definitions`_. """ -import urllib +from urllib.parse import urlencode, urlparse, unquote from lxml import html from flask_babel import gettext from searx import logger -from searx.url_utils import urlencode, urlparse from searx.utils import eval_xpath from searx.engines.xpath import extract_text @@ -87,7 +86,7 @@ def scrap_img_by_id(script, data_id): if 'gstatic.com/images' in line and data_id in line: url_line = _script[i + 1] img_url = url_line.split('"')[1] - img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%')) + img_url = unquote(img_url.replace(r'\u00', r'%')) return img_url diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py index c9cc75435..08875328c 100644 --- a/searx/engines/google_news.py +++ b/searx/engines/google_news.py @@ -10,9 +10,9 @@ @parse url, title, content, publishedDate """ +from urllib.parse import urlencode from lxml import html from searx.engines.google import _fetch_supported_languages, supported_languages_url -from searx.url_utils import urlencode from searx.utils import match_language # search-url diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index fd6b2e3be..08af55902 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -12,9 +12,9 @@ from datetime import date, timedelta from json import loads +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode import re # engine dependent config diff --git a/searx/engines/ina.py b/searx/engines/ina.py index ea509649f..cce580273 100644 --- a/searx/engines/ina.py +++ b/searx/engines/ina.py @@ -12,15 +12,12 @@ # @todo embedded (needs some md5 from video page) from json import loads +from urllib.parse import urlencode from lxml import html from dateutil import parser +from html.parser import HTMLParser from searx.engines.xpath import extract_text -from searx.url_utils import urlencode -try: - from HTMLParser import HTMLParser -except: - from html.parser import HTMLParser # engine dependent config categories = ['videos'] diff --git a/searx/engines/invidious.py b/searx/engines/invidious.py index cf76fd215..6ea942699 100644 --- a/searx/engines/invidious.py +++ b/searx/engines/invidious.py @@ -8,7 +8,7 @@ # @stable yes # @parse url, title, content, publishedDate, thumbnail, embedded, author, length -from searx.url_utils import quote_plus +from urllib.parse import quote_plus from dateutil import parser import time diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 785b0c490..1e5c39ac4 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -1,11 +1,8 @@ from collections import Iterable from json import loads -from sys import version_info -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import to_string -if version_info[0] == 3: - unicode = str search_url = None url_query = None @@ -37,8 +34,6 @@ def iterate(iterable): def is_iterable(obj): if type(obj) == str: return False - if type(obj) == unicode: - return False return isinstance(obj, Iterable) diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index 5e897c96f..af48d990b 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -12,9 +12,9 @@ from lxml import html from operator import itemgetter +from urllib.parse import quote, urljoin from searx.engines.xpath import extract_text from searx.utils import get_torrent_size, convert_str_to_int -from searx.url_utils import quote, urljoin # engine dependent config categories = ['videos', 'music', 'files'] diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index 0607ac93b..50ba74efc 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -14,7 +14,7 @@ from json import loads from string import Formatter -from searx.url_utils import urlencode, quote +from urllib.parse import urlencode, quote # engine dependent config categories = ['general'] @@ -79,7 +79,7 @@ def response(resp): if result.get('snippet', '').startswith('#REDIRECT'): continue url = base_url.format(language=resp.search_params['language']) +\ - 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')) + 'wiki/' + quote(result['title'].replace(' ', '_').encode()) # append result results.append({'url': url, diff --git a/searx/engines/microsoft_academic.py b/searx/engines/microsoft_academic.py index 9bac0069c..7426eef7e 100644 --- a/searx/engines/microsoft_academic.py +++ b/searx/engines/microsoft_academic.py @@ -12,8 +12,7 @@ Microsoft Academic (Science) from datetime import datetime from json import loads from uuid import uuid4 - -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import html_to_text categories = ['images'] diff --git a/searx/engines/mixcloud.py b/searx/engines/mixcloud.py index 470c007ea..0606350a9 100644 --- a/searx/engines/mixcloud.py +++ b/searx/engines/mixcloud.py @@ -12,7 +12,7 @@ from json import loads from dateutil import parser -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config categories = ['music'] diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index c57979a5f..ed8897ddc 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -10,8 +10,8 @@ """ from lxml import html +from urllib.parse import urlencode from searx.engines.xpath import extract_text -from searx.url_utils import urlencode from searx.utils import get_torrent_size, int_or_zero # engine dependent config diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 257b1a1b3..48a2a14b7 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -30,8 +30,8 @@ route_re = re.compile('(?:from )?(.+) to (.+)') # do search-request def request(query, params): - params['url'] = base_url + search_string.format(query=query.decode('utf-8')) - params['route'] = route_re.match(query.decode('utf-8')) + params['url'] = base_url + search_string.format(query=query.decode()) + params['route'] = route_re.match(query.decode()) return params @@ -52,7 +52,7 @@ def response(resp): if 'display_name' not in r: continue - title = r['display_name'] or u'' + title = r['display_name'] or '' osm_type = r.get('osm_type', r.get('type')) url = result_base_url.format(osm_type=osm_type, osm_id=r['osm_id']) @@ -64,7 +64,7 @@ def response(resp): # if no geojson is found and osm_type is a node, add geojson Point if not geojson and osm_type == 'node': - geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]} + geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]} address_raw = r.get('address') address = {} diff --git a/searx/engines/peertube.py b/searx/engines/peertube.py index b3795bf83..58ff38c02 100644 --- a/searx/engines/peertube.py +++ b/searx/engines/peertube.py @@ -14,7 +14,7 @@ from json import loads from datetime import datetime -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import html_to_text # engine dependent config diff --git a/searx/engines/photon.py b/searx/engines/photon.py index 15236f680..9201fc168 100644 --- a/searx/engines/photon.py +++ b/searx/engines/photon.py @@ -11,8 +11,8 @@ """ from json import loads +from urllib.parse import urlencode from searx.utils import searx_useragent -from searx.url_utils import urlencode # engine dependent config categories = ['map'] diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index 0122d6daa..42866d058 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -11,7 +11,9 @@ from json import loads from datetime import datetime from operator import itemgetter -from searx.url_utils import quote + +from urllib.parse import quote, urljoin +from searx.engines.xpath import extract_text from searx.utils import get_torrent_size # engine dependent config @@ -62,8 +64,8 @@ def response(resp): # parse results for result in search_res: link = url + "description.php?id=" + result["id"] - magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + \ - "&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers) + magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\ + + "&tr=" + "&tr=".join(trackers) params = { "url": link, diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 055f09226..7eb2e92f9 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -14,7 +14,7 @@ from flask_babel import gettext from lxml import etree from datetime import datetime -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.poolrequests import get diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py index 54e9dafad..ac918b905 100644 --- a/searx/engines/qwant.py +++ b/searx/engines/qwant.py @@ -12,9 +12,9 @@ from datetime import datetime from json import loads -from searx.utils import html_to_text -from searx.url_utils import urlencode -from searx.utils import match_language +from urllib.parse import urlencode +from searx.utils import html_to_text, match_language + # engine dependent config categories = None diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py index d19724906..e732875cb 100644 --- a/searx/engines/reddit.py +++ b/searx/engines/reddit.py @@ -12,7 +12,7 @@ import json from datetime import datetime -from searx.url_utils import urlencode, urljoin, urlparse +from urllib.parse import urlencode, urljoin, urlparse # engine dependent config categories = ['general', 'images', 'news', 'social media'] diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py index 7208dcb70..3ed6c6fd5 100644 --- a/searx/engines/scanr_structures.py +++ b/searx/engines/scanr_structures.py @@ -11,7 +11,7 @@ """ from json import loads, dumps -from searx.utils import html_to_text +from urllib.parse import html_to_text # engine dependent config categories = ['science'] @@ -29,7 +29,7 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['headers']['Content-type'] = "application/json" - params['data'] = dumps({"query": query.decode('utf-8'), + params['data'] = dumps({"query": query.decode(), "searchField": "ALL", "sortDirection": "ASC", "sortOrder": "RELEVANCY", diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 789e8e7a9..706285814 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -11,7 +11,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py index 4b8e9a84a..878d2e792 100644 --- a/searx/engines/searchcode_doc.py +++ b/searx/engines/searchcode_doc.py @@ -11,7 +11,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config categories = ['it'] diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py index f9b1f99c8..3778abe7b 100644 --- a/searx/engines/seedpeer.py +++ b/searx/engines/seedpeer.py @@ -11,7 +11,7 @@ from lxml import html from json import loads from operator import itemgetter -from searx.url_utils import quote, urljoin +from urllib.parse import quote, urljoin from searx.engines.xpath import extract_text diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 284689bf6..5165ea3ea 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -14,14 +14,11 @@ import re from json import loads from lxml import html from dateutil import parser +from io import StringIO +from urllib.parse import quote_plus, urlencode from searx import logger from searx.poolrequests import get as http_get -from searx.url_utils import quote_plus, urlencode -try: - from cStringIO import StringIO -except: - from io import StringIO # engine dependent config categories = ['music'] @@ -61,7 +58,7 @@ def get_client_id(): # gets app_js and searches for the clientid response = http_get(app_js_url) if response.ok: - cids = cid_re.search(response.content.decode("utf-8")) + cids = cid_re.search(response.content.decode()) if cids is not None and len(cids.groups()): return cids.groups()[0] logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 00c395706..74942326e 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -11,7 +11,7 @@ """ from json import loads -from searx.url_utils import urlencode +from urllib.parse import urlencode import requests import base64 @@ -39,8 +39,8 @@ def request(query, params): 'https://accounts.spotify.com/api/token', data={'grant_type': 'client_credentials'}, headers={'Authorization': 'Basic ' + base64.b64encode( - "{}:{}".format(api_client_id, api_client_secret).encode('utf-8') - ).decode('utf-8')} + "{}:{}".format(api_client_id, api_client_secret).encode() + ).decode()} ) j = loads(r.text) params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))} @@ -59,7 +59,7 @@ def response(resp): if result['type'] == 'track': title = result['name'] url = result['external_urls']['spotify'] - content = u'{} - {} - {}'.format( + content = '{} - {} - {}'.format( result['artists'][0]['name'], result['album']['name'], result['name']) diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index 25875aa15..90e4543d7 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -10,9 +10,9 @@ @parse url, title, content """ +from urllib.parse import urlencode, urljoin from lxml import html from searx.engines.xpath import extract_text -from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['it'] diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index 773212043..9c8774d7c 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -11,10 +11,10 @@ """ import re +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text from datetime import datetime -from searx.url_utils import urlencode from searx.utils import get_torrent_size, int_or_zero # engine dependent config diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py index c5e515acf..fcc8c042c 100644 --- a/searx/engines/torrentz.py +++ b/searx/engines/torrentz.py @@ -12,10 +12,10 @@ """ import re +from urllib.parse import urlencode from lxml import html from datetime import datetime from searx.engines.xpath import extract_text -from searx.url_utils import urlencode from searx.utils import get_torrent_size # engine dependent config diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 6cb18ff39..079eebe3c 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -12,8 +12,8 @@ import re from searx.utils import is_valid_lang categories = ['general'] -url = u'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' -web_url = u'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' +url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' +web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' weight = 100 parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) @@ -39,9 +39,9 @@ def request(query, params): key_form = '' params['url'] = url.format(from_lang=from_lang[1], to_lang=to_lang[1], - query=query.decode('utf-8'), + query=query.decode(), key=key_form) - params['query'] = query.decode('utf-8') + params['query'] = query.decode() params['from_lang'] = from_lang params['to_lang'] = to_lang diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index d2a8d2088..549b14e96 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -12,10 +12,10 @@ @todo publishedDate """ +from urllib.parse import urlencode, urljoin from lxml import html from datetime import datetime from searx.engines.xpath import extract_text -from searx.url_utils import urlencode, urljoin # engine dependent config categories = ['social media'] diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 2e8d6fdfc..45c6b30da 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -10,7 +10,7 @@ @parse url, title, img_src, thumbnail_src """ -from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl +from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl from json import loads url = 'https://unsplash.com/' diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index a92271019..fd3abc858 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -12,9 +12,9 @@ # @todo rewrite to api # @todo set content-parameter with correct data +from urllib.parse import urlencode from json import loads from dateutil import parser -from searx.url_utils import urlencode # engine dependent config categories = ['videos'] diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index eb7e1dc71..ffa3724fd 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -15,9 +15,9 @@ from searx import logger from searx.poolrequests import get from searx.engines.xpath import extract_text from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url -from searx.url_utils import urlencode from searx.utils import match_language, eval_xpath +from urllib.parse import urlencode from json import loads from lxml.html import fromstring from lxml import etree @@ -76,7 +76,7 @@ def request(query, params): def response(resp): results = [] htmlparser = etree.HTMLParser() - html = fromstring(resp.content.decode("utf-8"), parser=htmlparser) + html = fromstring(resp.content.decode(), parser=htmlparser) search_results = eval_xpath(html, wikidata_ids_xpath) if resp.search_params['language'].split('-')[0] == 'all': @@ -89,7 +89,7 @@ def response(resp): wikidata_id = search_result.split('/')[-1] url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language})) htmlresponse = get(url) - jsonresponse = loads(htmlresponse.content.decode("utf-8")) + jsonresponse = loads(htmlresponse.content.decode()) results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser) return results @@ -453,16 +453,16 @@ def get_geolink(result): latitude, longitude = coordinates.split(',') # convert to decimal - lat = int(latitude[:latitude.find(u'°')]) + lat = int(latitude[:latitude.find('°')]) if latitude.find('\'') >= 0: - lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0 + lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0 if latitude.find('"') >= 0: lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0 if latitude.find('S') >= 0: lat *= -1 - lon = int(longitude[:longitude.find(u'°')]) + lon = int(longitude[:longitude.find('°')]) if longitude.find('\'') >= 0: - lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0 + lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0 if longitude.find('"') >= 0: lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0 if longitude.find('W') >= 0: diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index bff24d16b..620ec3c14 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -10,13 +10,13 @@ @parse url, infobox """ +from urllib.parse import quote from json import loads from lxml.html import fromstring -from searx.url_utils import quote from searx.utils import match_language, searx_useragent # search-url -search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' +search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 1c58c4a9b..520eaa209 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -9,7 +9,7 @@ # @parse url, infobox from lxml import etree -from searx.url_utils import urlencode +from urllib.parse import urlencode # search-url search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' @@ -45,15 +45,15 @@ def request(query, params): # replace private user area characters to make text legible def replace_pua_chars(text): - pua_chars = {u'\uf522': u'\u2192', # rigth arrow - u'\uf7b1': u'\u2115', # set of natural numbers - u'\uf7b4': u'\u211a', # set of rational numbers - u'\uf7b5': u'\u211d', # set of real numbers - u'\uf7bd': u'\u2124', # set of integer numbers - u'\uf74c': 'd', # differential - u'\uf74d': u'\u212f', # euler's number - u'\uf74e': 'i', # imaginary number - u'\uf7d9': '='} # equals sign + pua_chars = {'\uf522': '\u2192', # rigth arrow + '\uf7b1': '\u2115', # set of natural numbers + '\uf7b4': '\u211a', # set of rational numbers + '\uf7b5': '\u211d', # set of real numbers + '\uf7bd': '\u2124', # set of integer numbers + '\uf74c': 'd', # differential + '\uf74d': '\u212f', # euler's number + '\uf74e': 'i', # imaginary number + '\uf7d9': '='} # equals sign for k, v in pua_chars.items(): text = text.replace(k, v) diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 387c9fa17..943d4f3fb 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -10,9 +10,9 @@ from json import loads from time import time +from urllib.parse import urlencode from searx.poolrequests import get as http_get -from searx.url_utils import urlencode # search-url url = 'https://www.wolframalpha.com/' diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index f1154b16d..1cb74dbad 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -11,7 +11,7 @@ """ from lxml import html -from searx.url_utils import urlencode, urljoin +from urllib.parse import urlencode, urljoin from searx.engines.xpath import extract_text # engine dependent config diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index a9f3e4bdd..bd97a93a5 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -1,7 +1,7 @@ +from urllib.parse import unquote, urlencode, urljoin, urlparse from lxml import html from lxml.etree import _ElementStringResult, _ElementUnicodeResult from searx.utils import html_to_text, eval_xpath -from searx.url_utils import unquote, urlencode, urljoin, urlparse search_url = None url_xpath = None @@ -56,7 +56,7 @@ def extract_url(xpath_results, search_url): if url.startswith('//'): # add http or https to this kind of url //example.com/ parsed_search_url = urlparse(search_url) - url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url) + url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url) elif url.startswith('/'): # fix relative url to the search engine url = urljoin(search_url, url) @@ -86,7 +86,7 @@ def normalize_url(url): p = parsed_url.path mark = p.find('/**') if mark != -1: - return unquote(p[mark + 3:]).decode('utf-8') + return unquote(p[mark + 3:]).decode() return url diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index f1d4c6abe..daa151082 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -14,7 +14,7 @@ from json import loads from dateutil import parser -from searx.url_utils import urlencode +from urllib.parse import urlencode from searx.utils import html_to_text diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index a6b4aeb9f..0133b57b5 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -11,9 +11,9 @@ @parse url, title, content, suggestion """ +from urllib.parse import unquote, urlencode from lxml import html from searx.engines.xpath import extract_text, extract_url -from searx.url_utils import unquote, urlencode from searx.utils import match_language, eval_xpath # engine dependent config diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index 9f6a4159b..345e4d91f 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -11,13 +11,13 @@ import re from datetime import datetime, timedelta +from urllib.parse import urlencode from lxml import html from searx.engines.xpath import extract_text, extract_url from searx.engines.yahoo import ( parse_url, _fetch_supported_languages, supported_languages_url, language_aliases ) from dateutil import parser -from searx.url_utils import urlencode from searx.utils import match_language # engine dependent config @@ -58,7 +58,7 @@ def request(query, params): def sanitize_url(url): if ".yahoo.com/" in url: - return re.sub(u"\\;\\_ylt\\=.+$", "", url) + return re.sub("\\;\\_ylt\\=.+$", "", url) else: return url diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py index 1c789f6cb..ff1ef5a26 100644 --- a/searx/engines/yandex.py +++ b/searx/engines/yandex.py @@ -9,9 +9,9 @@ @parse url, title, content """ +from urllib.parse import urlencode from lxml import html from searx import logger -from searx.url_utils import urlencode logger = logger.getChild('yandex engine') diff --git a/searx/engines/yggtorrent.py b/searx/engines/yggtorrent.py index 739574e8d..37bf3b1d9 100644 --- a/searx/engines/yggtorrent.py +++ b/searx/engines/yggtorrent.py @@ -11,8 +11,8 @@ from lxml import html from operator import itemgetter from datetime import datetime +from urllib.parse import quote from searx.engines.xpath import extract_text -from searx.url_utils import quote from searx.utils import get_torrent_size from searx.poolrequests import get as http_get diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py index bc4c0d58e..2542169a6 100644 --- a/searx/engines/youtube_api.py +++ b/searx/engines/youtube_api.py @@ -10,7 +10,7 @@ from json import loads from dateutil import parser -from searx.url_utils import urlencode +from urllib.parse import urlencode # engine dependent config categories = ['videos', 'music'] diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 68a3739a2..fef501458 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -10,9 +10,9 @@ from functools import reduce from json import loads +from urllib.parse import quote_plus from searx.engines.xpath import extract_text from searx.utils import list_get -from searx.url_utils import quote_plus # engine dependent config categories = ['videos', 'music'] |