summaryrefslogtreecommitdiff
path: root/searx/engines
diff options
context:
space:
mode:
authorPydo <pydo@tutanota.com>2016-10-01 10:46:18 -0400
committerPydo <pydo@tutanota.com>2016-10-01 10:46:18 -0400
commit55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea (patch)
tree96e953057dd3fc29681039f7ac5b282dac189ee8 /searx/engines
parent6f87bf2a1c76f1b94ad2119df7fb938c2307e370 (diff)
parent295fc9ce96d8cca9c6c4776a00e5fb0942eb6f4d (diff)
downloadsearxng-55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea.tar.gz
searxng-55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea.zip
Merge branch 'master' of https://github.com/asciimoo/searx into feature/seedpeer-engine-integration
Resolved conflict searx/settings.yml
Diffstat (limited to 'searx/engines')
-rw-r--r--searx/engines/__init__.py12
-rw-r--r--searx/engines/dictzone.py69
-rw-r--r--searx/engines/digbt.py2
-rw-r--r--searx/engines/translated.py65
-rw-r--r--searx/engines/wolframalpha_api.py16
-rw-r--r--searx/engines/wolframalpha_noapi.py16
6 files changed, 168 insertions, 12 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 782b622b0..14376c31f 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -57,11 +57,17 @@ def load_module(filename):
def load_engine(engine_data):
- engine_name = engine_data['engine']
+
+ if '_' in engine_data['name']:
+ logger.error('Engine name conains underscore: "{}"'.format(engine_data['name']))
+ sys.exit(1)
+
+ engine_module = engine_data['engine']
+
try:
- engine = load_module(engine_name + '.py')
+ engine = load_module(engine_module + '.py')
except:
- logger.exception('Cannot load engine "{}"'.format(engine_name))
+ logger.exception('Cannot load engine "{}"'.format(engine_module))
return None
for param_name in engine_data:
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
new file mode 100644
index 000000000..9765d5f60
--- /dev/null
+++ b/searx/engines/dictzone.py
@@ -0,0 +1,69 @@
+"""
+ Dictzone
+
+ @website https://dictzone.com/
+ @provide-api no
+ @using-api no
+ @results HTML (using search portal)
+ @stable no (HTML can change)
+ @parse url, title, content
+"""
+
+import re
+from urlparse import urljoin
+from lxml import html
+from cgi import escape
+from searx.utils import is_valid_lang
+
+categories = ['general']
+url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
+weight = 100
+
+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
+results_xpath = './/table[@id="r"]/tr'
+
+
+def request(query, params):
+ m = parser_re.match(unicode(query, 'utf8'))
+ if not m:
+ return params
+
+ from_lang, to_lang, query = m.groups()
+
+ from_lang = is_valid_lang(from_lang)
+ to_lang = is_valid_lang(to_lang)
+
+ if not from_lang or not to_lang:
+ return params
+
+ params['url'] = url.format(from_lang=from_lang[2],
+ to_lang=to_lang[2],
+ query=query)
+
+ return params
+
+
+def response(resp):
+ results = []
+
+ dom = html.fromstring(resp.text)
+
+ for k, result in enumerate(dom.xpath(results_xpath)[1:]):
+ try:
+ from_result, to_results_raw = result.xpath('./td')
+ except:
+ continue
+
+ to_results = []
+ for to_result in to_results_raw.xpath('./p/a'):
+ t = to_result.text_content()
+ if t.strip():
+ to_results.append(to_result.text_content())
+
+ results.append({
+ 'url': urljoin(resp.url, '?%d' % k),
+ 'title': escape(from_result.text_content()),
+ 'content': escape('; '.join(to_results))
+ })
+
+ return results
diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py
index c35327e8c..b55d7747a 100644
--- a/searx/engines/digbt.py
+++ b/searx/engines/digbt.py
@@ -40,7 +40,7 @@ def response(resp):
results = list()
for result in search_res:
url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
- title = result.xpath('.//a[@title]/text()')[0]
+ title = extract_text(result.xpath('.//a[@title]'))
content = extract_text(result.xpath('.//div[@class="files"]'))
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
diff --git a/searx/engines/translated.py b/searx/engines/translated.py
new file mode 100644
index 000000000..02047bc93
--- /dev/null
+++ b/searx/engines/translated.py
@@ -0,0 +1,65 @@
+"""
+ MyMemory Translated
+
+ @website https://mymemory.translated.net/
+ @provide-api yes (https://mymemory.translated.net/doc/spec.php)
+ @using-api yes
+ @results JSON
+ @stable yes
+ @parse url, title, content
+"""
+import re
+from cgi import escape
+from searx.utils import is_valid_lang
+
+categories = ['general']
+url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
+web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
+weight = 100
+
+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
+api_key = ''
+
+
+def request(query, params):
+ m = parser_re.match(unicode(query, 'utf8'))
+ if not m:
+ return params
+
+ from_lang, to_lang, query = m.groups()
+
+ from_lang = is_valid_lang(from_lang)
+ to_lang = is_valid_lang(to_lang)
+
+ if not from_lang or not to_lang:
+ return params
+
+ if api_key:
+ key_form = '&key=' + api_key
+ else:
+ key_form = ''
+ params['url'] = url.format(from_lang=from_lang[1],
+ to_lang=to_lang[1],
+ query=query,
+ key=key_form)
+ params['query'] = query
+ params['from_lang'] = from_lang
+ params['to_lang'] = to_lang
+
+ return params
+
+
+def response(resp):
+ results = []
+ results.append({
+ 'url': escape(web_url.format(
+ from_lang=resp.search_params['from_lang'][2],
+ to_lang=resp.search_params['to_lang'][2],
+ query=resp.search_params['query'])),
+ 'title': escape('[{0}-{1}] {2}'.format(
+ resp.search_params['from_lang'][1],
+ resp.search_params['to_lang'][1],
+ resp.search_params['query'])),
+ 'content': escape(resp.json()['responseData']['translatedText'])
+ })
+ return results
diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py
index 4526c825f..e743c8f56 100644
--- a/searx/engines/wolframalpha_api.py
+++ b/searx/engines/wolframalpha_api.py
@@ -18,10 +18,10 @@ api_key = '' # defined in settings.yml
# xpath variables
failure_xpath = '/queryresult[attribute::success="false"]'
-answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
pods_xpath = '//pod'
subpods_xpath = './subpod'
+pod_primary_xpath = './@primary'
pod_id_xpath = './@id'
pod_title_xpath = './@title'
plaintext_xpath = './plaintext'
@@ -75,13 +75,15 @@ def response(resp):
try:
infobox_title = search_results.xpath(input_xpath)[0].text
except:
- infobox_title = None
+ infobox_title = ""
pods = search_results.xpath(pods_xpath)
result_chunks = []
+ result_content = ""
for pod in pods:
pod_id = pod.xpath(pod_id_xpath)[0]
pod_title = pod.xpath(pod_title_xpath)[0]
+ pod_is_result = pod.xpath(pod_primary_xpath)
subpods = pod.xpath(subpods_xpath)
if not subpods:
@@ -94,6 +96,10 @@ def response(resp):
if content and pod_id not in image_pods:
+ if pod_is_result or not result_content:
+ if pod_id != "Input":
+ result_content = "%s: %s" % (pod_title, content)
+
# if no input pod was found, title is first plaintext pod
if not infobox_title:
infobox_title = content
@@ -109,6 +115,8 @@ def response(resp):
if not result_chunks:
return []
+ title = "Wolfram|Alpha (%s)" % infobox_title
+
# append infobox
results.append({'infobox': infobox_title,
'attributes': result_chunks,
@@ -116,7 +124,7 @@ def response(resp):
# append link to site
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
- 'title': 'Wolfram|Alpha',
- 'content': infobox_title})
+ 'title': title,
+ 'content': result_content})
return results
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 3a8180f04..e318d93e6 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -8,9 +8,11 @@
# @stable no
# @parse url, infobox
+from cgi import escape
from json import loads
from time import time
from urllib import urlencode
+from lxml.etree import XML
from searx.poolrequests import get as http_get
@@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\
referer_url = url + 'input/?{query}'
token = {'value': '',
- 'last_updated': 0}
+ 'last_updated': None}
# pods to display as image in infobox
# this pods do return a plaintext, but they look better and are more useful as images
@@ -80,10 +82,12 @@ def response(resp):
# TODO handle resp_json['queryresult']['assumptions']
result_chunks = []
- infobox_title = None
+ infobox_title = ""
+ result_content = ""
for pod in resp_json['queryresult']['pods']:
pod_id = pod.get('id', '')
pod_title = pod.get('title', '')
+ pod_is_result = pod.get('primary', None)
if 'subpods' not in pod:
continue
@@ -97,6 +101,10 @@ def response(resp):
if subpod['plaintext'] != '(requires interactivity)':
result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
+ if pod_is_result or not result_content:
+ if pod_id != "Input":
+ result_content = pod_title + ': ' + subpod['plaintext']
+
elif 'img' in subpod:
result_chunks.append({'label': pod_title, 'image': subpod['img']})
@@ -108,7 +116,7 @@ def response(resp):
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
- 'title': 'Wolfram|Alpha',
- 'content': infobox_title})
+ 'title': 'Wolfram|Alpha (' + infobox_title + ')',
+ 'content': result_content})
return results