diff options
author | Pydo <pydo@tutanota.com> | 2016-10-01 10:46:18 -0400 |
---|---|---|
committer | Pydo <pydo@tutanota.com> | 2016-10-01 10:46:18 -0400 |
commit | 55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea (patch) | |
tree | 96e953057dd3fc29681039f7ac5b282dac189ee8 /searx/engines | |
parent | 6f87bf2a1c76f1b94ad2119df7fb938c2307e370 (diff) | |
parent | 295fc9ce96d8cca9c6c4776a00e5fb0942eb6f4d (diff) | |
download | searxng-55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea.tar.gz searxng-55a5b686ed6dc0b9a6bfc45e0eaf1f70e24f2aea.zip |
Merge branch 'master' of https://github.com/asciimoo/searx into feature/seedpeer-engine-integration
Resolved conflict searx/settings.yml
Diffstat (limited to 'searx/engines')
-rw-r--r-- | searx/engines/__init__.py | 12 | ||||
-rw-r--r-- | searx/engines/dictzone.py | 69 | ||||
-rw-r--r-- | searx/engines/digbt.py | 2 | ||||
-rw-r--r-- | searx/engines/translated.py | 65 | ||||
-rw-r--r-- | searx/engines/wolframalpha_api.py | 16 | ||||
-rw-r--r-- | searx/engines/wolframalpha_noapi.py | 16 |
6 files changed, 168 insertions, 12 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 782b622b0..14376c31f 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -57,11 +57,17 @@ def load_module(filename): def load_engine(engine_data): - engine_name = engine_data['engine'] + + if '_' in engine_data['name']: + logger.error('Engine name conains underscore: "{}"'.format(engine_data['name'])) + sys.exit(1) + + engine_module = engine_data['engine'] + try: - engine = load_module(engine_name + '.py') + engine = load_module(engine_module + '.py') except: - logger.exception('Cannot load engine "{}"'.format(engine_name)) + logger.exception('Cannot load engine "{}"'.format(engine_module)) return None for param_name in engine_data: diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py new file mode 100644 index 000000000..9765d5f60 --- /dev/null +++ b/searx/engines/dictzone.py @@ -0,0 +1,69 @@ +""" + Dictzone + + @website https://dictzone.com/ + @provide-api no + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content +""" + +import re +from urlparse import urljoin +from lxml import html +from cgi import escape +from searx.utils import is_valid_lang + +categories = ['general'] +url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' +weight = 100 + +parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) +results_xpath = './/table[@id="r"]/tr' + + +def request(query, params): + m = parser_re.match(unicode(query, 'utf8')) + if not m: + return params + + from_lang, to_lang, query = m.groups() + + from_lang = is_valid_lang(from_lang) + to_lang = is_valid_lang(to_lang) + + if not from_lang or not to_lang: + return params + + params['url'] = url.format(from_lang=from_lang[2], + to_lang=to_lang[2], + query=query) + + return params + + +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + for k, result in enumerate(dom.xpath(results_xpath)[1:]): + try: + from_result, to_results_raw = result.xpath('./td') + except: + continue + + to_results = [] + for to_result in to_results_raw.xpath('./p/a'): + t = to_result.text_content() + if t.strip(): + to_results.append(to_result.text_content()) + + results.append({ + 'url': urljoin(resp.url, '?%d' % k), + 'title': escape(from_result.text_content()), + 'content': escape('; '.join(to_results)) + }) + + return results diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index c35327e8c..b55d7747a 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -40,7 +40,7 @@ def response(resp): results = list() for result in search_res: url = urljoin(URL, result.xpath('.//a[@title]/@href')[0]) - title = result.xpath('.//a[@title]/text()')[0] + title = extract_text(result.xpath('.//a[@title]')) content = extract_text(result.xpath('.//div[@class="files"]')) files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) diff --git a/searx/engines/translated.py b/searx/engines/translated.py new file mode 100644 index 000000000..02047bc93 --- /dev/null +++ b/searx/engines/translated.py @@ -0,0 +1,65 @@ +""" + MyMemory Translated + + @website https://mymemory.translated.net/ + @provide-api yes (https://mymemory.translated.net/doc/spec.php) + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" +import re +from cgi import escape +from searx.utils import is_valid_lang + +categories = ['general'] +url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' +web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' +weight = 100 + +parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) +api_key = '' + + +def request(query, params): + m = parser_re.match(unicode(query, 'utf8')) + if not m: + return params + + from_lang, to_lang, query = m.groups() + + from_lang = is_valid_lang(from_lang) + to_lang = is_valid_lang(to_lang) + + if not from_lang or not to_lang: + return params + + if api_key: + key_form = '&key=' + api_key + else: + key_form = '' + params['url'] = url.format(from_lang=from_lang[1], + to_lang=to_lang[1], + query=query, + key=key_form) + params['query'] = query + params['from_lang'] = from_lang + params['to_lang'] = to_lang + + return params + + +def response(resp): + results = [] + results.append({ + 'url': escape(web_url.format( + from_lang=resp.search_params['from_lang'][2], + to_lang=resp.search_params['to_lang'][2], + query=resp.search_params['query'])), + 'title': escape('[{0}-{1}] {2}'.format( + resp.search_params['from_lang'][1], + resp.search_params['to_lang'][1], + resp.search_params['query'])), + 'content': escape(resp.json()['responseData']['translatedText']) + }) + return results diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 4526c825f..e743c8f56 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -18,10 +18,10 @@ api_key = '' # defined in settings.yml # xpath variables failure_xpath = '/queryresult[attribute::success="false"]' -answer_xpath = '//pod[attribute::primary="true"]/subpod/plaintext' input_xpath = '//pod[starts-with(attribute::id, "Input")]/subpod/plaintext' pods_xpath = '//pod' subpods_xpath = './subpod' +pod_primary_xpath = './@primary' pod_id_xpath = './@id' pod_title_xpath = './@title' plaintext_xpath = './plaintext' @@ -75,13 +75,15 @@ def response(resp): try: infobox_title = search_results.xpath(input_xpath)[0].text except: - infobox_title = None + infobox_title = "" pods = search_results.xpath(pods_xpath) result_chunks = [] + result_content = "" for pod in pods: pod_id = pod.xpath(pod_id_xpath)[0] pod_title = pod.xpath(pod_title_xpath)[0] + pod_is_result = pod.xpath(pod_primary_xpath) subpods = pod.xpath(subpods_xpath) if not subpods: @@ -94,6 +96,10 @@ def response(resp): if content and pod_id not in image_pods: + if pod_is_result or not result_content: + if pod_id != "Input": + result_content = "%s: %s" % (pod_title, content) + # if no input pod was found, title is first plaintext pod if not infobox_title: infobox_title = content @@ -109,6 +115,8 @@ def response(resp): if not result_chunks: return [] + title = "Wolfram|Alpha (%s)" % infobox_title + # append infobox results.append({'infobox': infobox_title, 'attributes': result_chunks, @@ -116,7 +124,7 @@ def response(resp): # append link to site results.append({'url': resp.request.headers['Referer'].decode('utf8'), - 'title': 'Wolfram|Alpha', - 'content': infobox_title}) + 'title': title, + 'content': result_content}) return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 3a8180f04..e318d93e6 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -8,9 +8,11 @@ # @stable no # @parse url, infobox +from cgi import escape from json import loads from time import time from urllib import urlencode +from lxml.etree import XML from searx.poolrequests import get as http_get @@ -34,7 +36,7 @@ search_url = url + 'input/json.jsp'\ referer_url = url + 'input/?{query}' token = {'value': '', - 'last_updated': 0} + 'last_updated': None} # pods to display as image in infobox # this pods do return a plaintext, but they look better and are more useful as images @@ -80,10 +82,12 @@ def response(resp): # TODO handle resp_json['queryresult']['assumptions'] result_chunks = [] - infobox_title = None + infobox_title = "" + result_content = "" for pod in resp_json['queryresult']['pods']: pod_id = pod.get('id', '') pod_title = pod.get('title', '') + pod_is_result = pod.get('primary', None) if 'subpods' not in pod: continue @@ -97,6 +101,10 @@ def response(resp): if subpod['plaintext'] != '(requires interactivity)': result_chunks.append({'label': pod_title, 'value': subpod['plaintext']}) + if pod_is_result or not result_content: + if pod_id != "Input": + result_content = pod_title + ': ' + subpod['plaintext'] + elif 'img' in subpod: result_chunks.append({'label': pod_title, 'image': subpod['img']}) @@ -108,7 +116,7 @@ def response(resp): 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]}) results.append({'url': resp.request.headers['Referer'].decode('utf8'), - 'title': 'Wolfram|Alpha', - 'content': infobox_title}) + 'title': 'Wolfram|Alpha (' + infobox_title + ')', + 'content': result_content}) return results |