Merge branch 'master' of https://github.com/asciimoo/searx into feature/seedpeer-engine-integration

Resolved conflict searx/settings.yml
author: Pydo <pydo@tutanota.com> 2016-10-01 10:46:18 -0400
committer: Pydo <pydo@tutanota.com> 2016-10-01 10:46:18 -0400
commit: 55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea (patch)
tree: 96e953057dd3fc29681039f7ac5b282dac189ee8 /searx/engines
parent: 6f87bf2a1c76f1b94ad2119df7fb938c2307e370 (diff)
parent: 295fc9ce96d8cca9c6c4776a00e5fb0942eb6f4d (diff)
download: searxng-55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea.tar.gz
searxng-55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea.zip
6 files changed, 168 insertions, 12 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 782b622b0..14376c31f 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -57,11 +57,17 @@ def load_module(filename):
 
 
 def load_engine(engine_data):
-    engine_name = engine_data['engine']
+
+    if '_' in engine_data['name']:
+        logger.error('Engine name conains underscore: "{}"'.format(engine_data['name']))
+        sys.exit(1)
+
+    engine_module = engine_data['engine']
+
     try:
-        engine = load_module(engine_name + '.py')
+        engine = load_module(engine_module + '.py')
     except:
-        logger.exception('Cannot load engine "{}"'.format(engine_name))
+        logger.exception('Cannot load engine "{}"'.format(engine_module))
         return None
 
     for param_name in engine_data:
diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py
new file mode 100644
index 000000000..9765d5f60
--- /dev/null
+++ b/searx/engines/dictzone.py
@@ -0,0 +1,69 @@
+"""
+ Dictzone
+
+ @website     https://dictzone.com/
+ @provide-api no
+ @using-api   no
+ @results     HTML (using search portal)
+ @stable      no (HTML can change)
+ @parse       url, title, content
+"""
+
+import re
+from urlparse import urljoin
+from lxml import html
+from cgi import escape
+from searx.utils import is_valid_lang
+
+categories = ['general']
+url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
+weight = 100
+
+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
+results_xpath = './/table[@id="r"]/tr'
+
+
+def request(query, params):
+    m = parser_re.match(unicode(query, 'utf8'))
+    if not m:
+        return params
+
+    from_lang, to_lang, query = m.groups()
+
+    from_lang = is_valid_lang(from_lang)
+    to_lang = is_valid_lang(to_lang)
+
+    if not from_lang or not to_lang:
+        return params
+
+    params['url'] = url.format(from_lang=from_lang[2],
+                               to_lang=to_lang[2],
+                               query=query)
+
+    return params
+
+
+def response(resp):
+    results = []
+
+    dom = html.fromstring(resp.text)
+
+    for k, result in enumerate(dom.xpath(results_xpath)[1:]):
+        try:
+            from_result, to_results_raw = result.xpath('./td')
+        except:
+            continue
+
+        to_results = []
+        for to_result in to_results_raw.xpath('./p/a'):
+            t = to_result.text_content()
+            if t.strip():
+                to_results.append(to_result.text_content())
+
+        results.append({
+            'url': urljoin(resp.url, '?%d' % k),
+            'title': escape(from_result.text_content()),
+            'content': escape('; '.join(to_results))
+        })
+
+    return results
diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py
index c35327e8c..b55d7747a 100644
--- a/searx/engines/digbt.py
+++ b/searx/engines/digbt.py
@@ -40,7 +40,7 @@ def response(resp):
     results = list()
     for result in search_res:
         url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
-        title = result.xpath('.//a[@title]/text()')[0]
+        title = extract_text(result.xpath('.//a[@title]'))
         content = extract_text(result.xpath('.//div[@class="files"]'))
         files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
         filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
diff --git a/searx/engines/translated.py b/searx/engines/translated.py
new file mode 100644
index 000000000..02047bc93
--- /dev/null
+++ b/searx/engines/translated.py
@@ -0,0 +1,65 @@
+"""
+ MyMemory Translated
+
+ @website     https://mymemory.translated.net/
+ @provide-api yes (https://mymemory.translated.net/doc/spec.php)
+ @using-api   yes
+ @results     JSON
+ @stable      yes
+ @parse       url, title, content
+"""
+import re
+from cgi import escape
+from searx.utils import is_valid_lang
+
+categories = ['general']
+url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
+web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
+weight = 100
+
+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
+api_key = ''
+
+
+def request(query, params):
+    m = parser_re.match(unicode(query, 'utf8'))
+    if not m:
+        return params
+
+    from_lang, to_lang, query = m.groups()
+
+    from_lang = is_valid_lang(from_lang)
+    to_lang = is_valid_lang(to_lang)
+
+    if not from_lang or not to_lang:
+        return params
+
+    if api_key:
+        key_form = '&key=' + api_key
+    else:
+        key_form = ''
+    params['url'] = url.format(from_lang=from_lang[1],
+                               to_lang=to_lang[1],
+                               query=query,
+                               key=key_form)
+    params['query'] = query
+    params['from_lang'] = from_lang
+    params['to_lang'] = to_lang
+
+    return params
+
+
+def response(resp):
+    results = []
+    results.append({
+        'url': escape(web_url.format(
+            from_lang=resp.search_params['from_lang'][2],
+            to_lang=resp.search_params['to_lang'][2],
+            query=resp.search_params['query'])),
+        'title': escape('[{0}-{1}] {2}'.format(
+            resp.search_params['from_lang'][1],
+            resp.search_params['to_lang'][1],
+            resp.search_params['query'])),
+        'content': escape(resp.json()['responseData']['translatedText'])
+    })
+    return results
diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py
index 4526c825f..e743c8f56 100644
--- a/searx/engines/wolframalpha_api.py
+++ b/searx/engines/wolframalpha_api.py
@@ -18,10 +18,10 @@ api_key = ''  # defined in settings.yml
 
 # xpath variables
 failure_xpath = '/queryresult[attribute::success="false"]'
-answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext'
 input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext'
 pods_xpath = '//pod'
 subpods_xpath = './subpod'
+pod_primary_xpath = './@primary'
 pod_id_xpath = './@id'
 pod_title_xpath = './@title'
 plaintext_xpath = './plaintext'
@@ -75,13 +75,15 @@ def response(resp):
     try:
         infobox_title = search_results.xpath(input_xpath)[0].text
     except:
-        infobox_title = None
+        infobox_title = ""
 
     pods = search_results.xpath(pods_xpath)
     result_chunks = []
+    result_content = ""
     for pod in pods:
         pod_id = pod.xpath(pod_id_xpath)[0]
         pod_title = pod.xpath(pod_title_xpath)[0]
+        pod_is_result = pod.xpath(pod_primary_xpath)
 
         subpods = pod.xpath(subpods_xpath)
         if not subpods:
@@ -94,6 +96,10 @@ def response(resp):
 
             if content and pod_id not in image_pods:
 
+                if pod_is_result or not result_content:
+                    if pod_id != "Input":
+                        result_content = "%s: %s" % (pod_title, content)
+
                 # if no input pod was found, title is first plaintext pod
                 if not infobox_title:
                     infobox_title = content
@@ -109,6 +115,8 @@ def response(resp):
     if not result_chunks:
         return []
 
+    title = "Wolfram|Alpha (%s)" % infobox_title
+
     # append infobox
     results.append({'infobox': infobox_title,
                     'attributes': result_chunks,
@@ -116,7 +124,7 @@ def response(resp):
 
     # append link to site
     results.append({'url': resp.request.headers['Referer'].decode('utf8'),
-                    'title': 'Wolfram|Alpha',
-                    'content': infobox_title})
+                    'title': title,
+                    'content': result_content})
 
     return results
diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py
index 3a8180f04..e318d93e6 100644
--- a/searx/engines/wolframalpha_noapi.py
+++ b/searx/engines/wolframalpha_noapi.py
@@ -8,9 +8,11 @@
 # @stable      no
 # @parse       url, infobox
 
+from cgi import escape
 from json import loads
 from time import time
 from urllib import urlencode
+from lxml.etree import XML
 
 from searx.poolrequests import get as http_get
 
@@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\
 referer_url = url + 'input/?{query}'
 
 token = {'value': '',
-         'last_updated': 0}
+         'last_updated': None}
 
 # pods to display as image in infobox
 # this pods do return a plaintext, but they look better and are more useful as images
@@ -80,10 +82,12 @@ def response(resp):
 
     # TODO handle resp_json['queryresult']['assumptions']
     result_chunks = []
-    infobox_title = None
+    infobox_title = ""
+    result_content = ""
     for pod in resp_json['queryresult']['pods']:
         pod_id = pod.get('id', '')
         pod_title = pod.get('title', '')
+        pod_is_result = pod.get('primary', None)
 
         if 'subpods' not in pod:
             continue
@@ -97,6 +101,10 @@ def response(resp):
                 if subpod['plaintext'] != '(requires interactivity)':
                     result_chunks.append({'label': pod_title, 'value': subpod['plaintext']})
 
+                if pod_is_result or not result_content:
+                    if pod_id != "Input":
+                        result_content = pod_title + ': ' + subpod['plaintext']
+
             elif 'img' in subpod:
                 result_chunks.append({'label': pod_title, 'image': subpod['img']})
 
@@ -108,7 +116,7 @@ def response(resp):
                     'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
 
     results.append({'url': resp.request.headers['Referer'].decode('utf8'),
-                    'title': 'Wolfram|Alpha',
-                    'content': infobox_title})
+                    'title': 'Wolfram|Alpha (' + infobox_title + ')',
+                    'content': result_content})
 
     return results
author	Pydo <pydo@tutanota.com>	2016-10-01 10:46:18 -0400
committer	Pydo <pydo@tutanota.com>	2016-10-01 10:46:18 -0400
commit	55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea (patch)
tree	96e953057dd3fc29681039f7ac5b282dac189ee8 /searx/engines
parent	6f87bf2a1c76f1b94ad2119df7fb938c2307e370 (diff)
parent	295fc9ce96d8cca9c6c4776a00e5fb0942eb6f4d (diff)
download	searxng-55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea.tar.gz searxng-55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea.zip