summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/500px.py4
-rw-r--r--searx/engines/__init__.py4
-rw-r--r--searx/engines/digg.py3
-rw-r--r--searx/engines/flickr-noapi.py12
-rw-r--r--searx/engines/flickr.py31
-rw-r--r--searx/engines/kickass.py5
-rw-r--r--searx/engines/searchcode_code.py18
-rw-r--r--searx/engines/searchcode_doc.py15
-rw-r--r--searx/engines/subtitleseeker.py8
-rw-r--r--searx/engines/twitter.py7
-rw-r--r--searx/https_rewrite.py1
-rw-r--r--searx/search.py14
-rw-r--r--searx/utils.py5
13 files changed, 80 insertions, 47 deletions
diff --git a/searx/engines/500px.py b/searx/engines/500px.py
index 5d53af32c..3b95619a1 100644
--- a/searx/engines/500px.py
+++ b/searx/engines/500px.py
@@ -35,9 +35,9 @@ def request(query, params):
# get response from search-request
def response(resp):
results = []
-
+
dom = html.fromstring(resp.text)
-
+
# parse results
for result in dom.xpath('//div[@class="photo"]'):
link = result.xpath('.//a')[0]
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index d42339af8..9bc5cdfd4 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -81,7 +81,7 @@ def load_engine(engine_data):
if engine_attr.startswith('_'):
continue
if getattr(engine, engine_attr) is None:
- print('[E] Engine config error: Missing attribute "{0}.{1}"'\
+ print('[E] Engine config error: Missing attribute "{0}.{1}"'
.format(engine.name, engine_attr))
sys.exit(1)
@@ -102,7 +102,7 @@ def load_engine(engine_data):
if engine.shortcut:
# TODO check duplications
if engine.shortcut in engine_shortcuts:
- print('[E] Engine config error: ambigious shortcut: {0}'\
+ print('[E] Engine config error: ambigious shortcut: {0}'
.format(engine.shortcut))
sys.exit(1)
engine_shortcuts[engine.shortcut] = engine.name
diff --git a/searx/engines/digg.py b/searx/engines/digg.py
index 4ebfe58c1..241234fdb 100644
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
@@ -52,7 +52,8 @@ def response(resp):
thumbnail = result.xpath('.//img')[0].attrib.get('src')
title = ''.join(result.xpath(title_xpath))
content = escape(''.join(result.xpath(content_xpath)))
- publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime'))
+ pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
+ publishedDate = parser.parse(pubdate)
# append result
results.append({'url': url,
diff --git a/searx/engines/flickr-noapi.py b/searx/engines/flickr-noapi.py
index 522503b53..f90903647 100644
--- a/searx/engines/flickr-noapi.py
+++ b/searx/engines/flickr-noapi.py
@@ -53,7 +53,8 @@ def response(resp):
for photo in photos:
- # In paged configuration, the first pages' photos are represented by a None object
+ # In paged configuration, the first pages' photos
+ # are represented by a None object
if photo is None:
continue
@@ -74,10 +75,15 @@ def response(resp):
title = photo['title']
- content = '<span class="photo-author">' + photo['owner']['username'] + '</span><br />'
+ content = '<span class="photo-author">' +\
+ photo['owner']['username'] +\
+ '</span><br />'
if 'description' in photo:
- content = content + '<span class="description">' + photo['description'] + '</span>'
+ content = content +\
+ '<span class="description">' +\
+ photo['description'] +\
+ '</span>'
# append result
results.append({'url': url,
diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py
index 2fa5ed7ec..4dadd80a6 100644
--- a/searx/engines/flickr.py
+++ b/searx/engines/flickr.py
@@ -1,10 +1,10 @@
#!/usr/bin/env python
## Flickr (Images)
-#
+#
# @website https://www.flickr.com
-# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-#
+# @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
+#
# @using-api yes
# @results JSON
# @stable yes
@@ -18,16 +18,20 @@ categories = ['images']
nb_per_page = 15
paging = True
-api_key= None
+api_key = None
-url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
+url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\
+ '&api_key={api_key}&{text}&sort=relevance' +\
+ '&extras=description%2C+owner_name%2C+url_o%2C+url_z' +\
+ '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
paging = True
+
def build_flickr_url(user_id, photo_id):
- return photo_url.format(userid=user_id,photoid=photo_id)
+ return photo_url.format(userid=user_id, photoid=photo_id)
def request(query, params):
@@ -40,7 +44,7 @@ def request(query, params):
def response(resp):
results = []
-
+
search_results = loads(resp.text)
# return empty array if there are no results
@@ -64,11 +68,14 @@ def response(resp):
url = build_flickr_url(photo['owner'], photo['id'])
title = photo['title']
-
- content = '<span class="photo-author">'+ photo['ownername'] +'</span><br />'
-
- content = content + '<span class="description">' + photo['description']['_content'] + '</span>'
-
+
+ content = '<span class="photo-author">' +\
+ photo['ownername'] +\
+ '</span><br />' +\
+ '<span class="description">' +\
+ photo['description']['_content'] +\
+ '</span>'
+
# append result
results.append({'url': url,
'title': title,
diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py
index f1fcd9e1a..16e9d6de6 100644
--- a/searx/engines/kickass.py
+++ b/searx/engines/kickass.py
@@ -24,7 +24,7 @@ search_url = url + 'search/{search_term}/{pageno}/'
# specific xpath variables
magnet_xpath = './/a[@title="Torrent magnet link"]'
-#content_xpath = './/font[@class="detDesc"]//text()'
+content_xpath = './/span[@class="font11px lightgrey block"]'
# do search-request
@@ -56,7 +56,8 @@ def response(resp):
link = result.xpath('.//a[@class="cellMainLink"]')[0]
href = urljoin(url, link.attrib['href'])
title = ' '.join(link.xpath('.//text()'))
- content = escape(html.tostring(result.xpath('.//span[@class="font11px lightgrey block"]')[0], method="text"))
+ content = escape(html.tostring(result.xpath(content_xpath)[0],
+ method="text"))
seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py
index 2ba0e52f1..0f98352c1 100644
--- a/searx/engines/searchcode_code.py
+++ b/searx/engines/searchcode_code.py
@@ -11,7 +11,6 @@
from urllib import urlencode
from json import loads
import cgi
-import re
# engine dependent config
categories = ['it']
@@ -33,7 +32,7 @@ def request(query, params):
# get response from search-request
def response(resp):
results = []
-
+
search_results = loads(resp.text)
# parse results
@@ -41,21 +40,22 @@ def response(resp):
href = result['url']
title = "" + result['name'] + " - " + result['filename']
content = result['repo'] + "<br />"
-
+
lines = dict()
for line, code in result['lines'].items():
lines[int(line)] = code
content = content + '<pre class="code-formatter"><table class="code">'
for line, code in sorted(lines.items()):
- content = content + '<tr><td class="line-number" style="padding-right:5px;">'
- content = content + str(line) + '</td><td class="code-snippet">'
- # Replace every two spaces with ' &nbps;' to keep formatting while allowing the browser to break the line if necessary
- content = content + cgi.escape(code).replace('\t', ' ').replace(' ', '&nbsp; ').replace(' ', ' &nbsp;')
+ content = content + '<tr><td class="line-number" style="padding-right:5px;">'
+ content = content + str(line) + '</td><td class="code-snippet">'
+ # Replace every two spaces with ' &nbps;' to keep formatting
+ # while allowing the browser to break the line if necessary
+ content = content + cgi.escape(code).replace('\t', ' ').replace(' ', '&nbsp; ').replace(' ', ' &nbsp;')
content = content + "</td></tr>"
-
+
content = content + "</table></pre>"
-
+
# append result
results.append({'url': href,
'title': title,
diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py
index e07cbeab9..b5b7159be 100644
--- a/searx/engines/searchcode_doc.py
+++ b/searx/engines/searchcode_doc.py
@@ -31,15 +31,22 @@ def request(query, params):
# get response from search-request
def response(resp):
results = []
-
+
search_results = loads(resp.text)
# parse results
for result in search_results['results']:
href = result['url']
- title = "[" + result['type'] + "] " + result['namespace'] + " " + result['name']
- content = '<span class="highlight">[' + result['type'] + "] " + result['name'] + " " + result['synopsis'] + "</span><br />" + result['description']
-
+ title = "[" + result['type'] + "] " +\
+ result['namespace'] +\
+ " " + result['name']
+ content = '<span class="highlight">[' +\
+ result['type'] + "] " +\
+ result['name'] + " " +\
+ result['synopsis'] +\
+ "</span><br />" +\
+ result['description']
+
# append result
results.append({'url': href,
'title': title,
diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py
index 2f1636f59..c413dcf26 100644
--- a/searx/engines/subtitleseeker.py
+++ b/searx/engines/subtitleseeker.py
@@ -60,10 +60,14 @@ def response(resp):
content = result.xpath('.//div[contains(@class,"red")]//text()')[0]
content = content + " - "
- content = content + html.tostring(result.xpath('.//div[contains(@class,"grey-web")]')[0], method='text')
+ text = result.xpath('.//div[contains(@class,"grey-web")]')[0]
+ content = content + html.tostring(text, method='text')
if result.xpath(".//span") != []:
- content = content + " - (" + result.xpath(".//span//text()")[0].strip() + ")"
+ content = content +\
+ " - (" +\
+ result.xpath(".//span//text()")[0].strip() +\
+ ")"
# append result
results.append({'url': href,
diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py
index 5a7046c83..bd9a8c2fc 100644
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
@@ -28,7 +28,7 @@ search_url = base_url+'search?'
results_xpath = '//li[@data-item-type="tweet"]'
link_xpath = './/small[@class="time"]//a'
title_xpath = './/span[@class="username js-action-profile-name"]//text()'
-content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
+content_xpath = './/p[@class="js-tweet-text tweet-text"]'
timestamp_xpath = './/span[contains(@class,"_timestamp")]'
@@ -54,10 +54,11 @@ def response(resp):
link = tweet.xpath(link_xpath)[0]
url = urljoin(base_url, link.attrib.get('href'))
title = ''.join(tweet.xpath(title_xpath))
- content = escape(''.join(tweet.xpath(content_xpath)))
+ content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8"))
pubdate = tweet.xpath(timestamp_xpath)
if len(pubdate) > 0:
- publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None)
+ timestamp = float(pubdate[0].attrib.get('data-time'))
+ publishedDate = datetime.fromtimestamp(timestamp, None)
# append result
results.append({'url': url,
'title': title,
diff --git a/searx/https_rewrite.py b/searx/https_rewrite.py
index 408474a44..d873b406d 100644
--- a/searx/https_rewrite.py
+++ b/searx/https_rewrite.py
@@ -154,7 +154,6 @@ def load_https_rules(rules_path):
print(' * {n} https-rules loaded'.format(n=len(https_rules)))
-
def https_url_rewrite(result):
skip_https_rewrite = False
# check if HTTPS rewrite is possible
diff --git a/searx/search.py b/searx/search.py
index d1d03805f..fbbf3fe41 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -69,11 +69,16 @@ def threaded_requests(requests):
print('engine timeout: {0}'.format(th._engine_name))
-
# get default reqest parameter
def default_request_params():
return {
- 'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}, 'verify': True}
+ 'method': 'GET',
+ 'headers': {},
+ 'data': {},
+ 'url': '',
+ 'cookies': {},
+ 'verify': True
+ }
# create a callback wrapper for the search engine results
@@ -487,14 +492,15 @@ class Search(object):
continue
# append request to list
- requests.append((req, request_params['url'], request_args, selected_engine['name']))
+ requests.append((req, request_params['url'],
+ request_args,
+ selected_engine['name']))
if not requests:
return results, suggestions, answers, infoboxes
# send all search-request
threaded_requests(requests)
-
while not results_queue.empty():
engine_name, engine_results = results_queue.get_nowait()
diff --git a/searx/utils.py b/searx/utils.py
index dc831ef5f..b725a8b95 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -30,8 +30,9 @@ def gen_useragent():
def searx_useragent():
- return 'searx/{searx_version} {suffix}'.format(searx_version=VERSION_STRING,
- suffix=settings['server'].get('useragent_suffix', ''))
+ return 'searx/{searx_version} {suffix}'.format(
+ searx_version=VERSION_STRING,
+ suffix=settings['server'].get('useragent_suffix', ''))
def highlight_content(content, query):