summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2020-11-14 13:25:08 +0100
committerGitHub <noreply@github.com>2020-11-14 13:25:08 +0100
commitc3d9b17c2ad93ddf8b958e0a9f54fbfe62dc679f (patch)
tree41982ae9592dd5558473c9fc8e1b404cc78395d3
parent102c08838b4f4afe044d8eafcc0de635f1401d35 (diff)
parent43e697681efbe7856abe21e6abdac7694447cae8 (diff)
downloadsearxng-c3d9b17c2ad93ddf8b958e0a9f54fbfe62dc679f.tar.gz
searxng-c3d9b17c2ad93ddf8b958e0a9f54fbfe62dc679f.zip
Merge pull request #2292 from kvch/elasticsearch-engine
New engine: Elasticsearch
-rw-r--r--searx/engines/elasticsearch.py142
-rw-r--r--searx/search.py3
-rw-r--r--searx/settings.yml14
3 files changed, 158 insertions, 1 deletions
diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py
new file mode 100644
index 000000000..bad65fb27
--- /dev/null
+++ b/searx/engines/elasticsearch.py
@@ -0,0 +1,142 @@
+from json import loads, dumps
+from lxml import html
+from urllib.parse import quote, urljoin
+from requests.auth import HTTPBasicAuth
+from searx.utils import extract_text, get_torrent_size
+
+
+base_url = 'http://localhost:9200'
+username = ''
+password = ''
+index = ''
+search_url = base_url + '/' + index + '/_search'
+query_type = 'match'
+custom_query_json = {}
+show_metadata = False
+categories = ['general']
+
+
+def init(engine_settings):
+ if 'query_type' in engine_settings and engine_settings['query_type'] not in _available_query_types:
+ raise ValueError('unsupported query type', engine_settings['query_type'])
+
+ if index == '':
+ raise ValueError('index cannot be empty')
+
+
+def request(query, params):
+ if query_type not in _available_query_types:
+ return params
+
+ if username and password:
+ params['auth'] = HTTPBasicAuth(username, password)
+
+ params['url'] = search_url
+ params['method'] = 'GET'
+ params['data'] = dumps(_available_query_types[query_type](query))
+ params['headers']['Content-Type'] = 'application/json'
+
+ return params
+
+
+def _match_query(query):
+ """
+ The standard for full text queries.
+ searx format: "key:value" e.g. city:berlin
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
+ """
+
+ try:
+ key, value = query.split(':')
+ except:
+ raise ValueError('query format must be "key:value"')
+
+ return {"query": {"match": {key: {'query': value}}}}
+
+
+def _simple_query_string_query(query):
+ """
+ Accepts query strings, but it is less strict than query_string
+ The field used can be specified in index.query.default_field in Elasticsearch.
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html
+ """
+
+ return {'query': {'simple_query_string': {'query': query}}}
+
+
+def _term_query(query):
+ """
+ Accepts one term and the name of the field.
+ searx format: "key:value" e.g. city:berlin
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html
+ """
+
+ try:
+ key, value = query.split(':')
+ except:
+ raise ValueError('query format must be key:value')
+
+ return {'query': {'term': {key: value}}}
+
+
+def _terms_query(query):
+ """
+ Accepts multiple terms and the name of the field.
+ searx format: "key:value1,value2" e.g. city:berlin,paris
+ REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
+ """
+
+ try:
+ key, values = query.split(':')
+ except:
+ raise ValueError('query format must be key:value1,value2')
+
+ return {'query': {'terms': {key: values.split(',')}}}
+
+
+def _custom_query(query):
+ key, value = query.split(':')
+ custom_query = custom_query_json
+ for query_key, query_value in custom_query.items():
+ if query_key == '{{KEY}}':
+ custom_query[key] = custom_query.pop(query_key)
+ if query_value == '{{VALUE}}':
+ custom_query[query_key] = value
+ return custom_query
+
+
+def response(resp):
+ results = []
+
+ resp_json = loads(resp.text)
+ if 'error' in resp_json:
+ raise Exception(resp_json['error'])
+
+ for result in resp_json['hits']['hits']:
+ r = {key: str(value) if not key.startswith('_') else value for key, value in result['_source'].items()}
+ r['template'] = 'key-value.html'
+
+ if show_metadata:
+ r['metadata'] = {'index': result['_index'],
+ 'id': result['_id'],
+ 'score': result['_score']}
+
+ results.append(r)
+
+ return results
+
+
+_available_query_types = {
+ # Full text queries
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
+ 'match': _match_query,
+ 'simple_query_string': _simple_query_string_query,
+
+ # Term-level queries
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
+ 'term': _term_query,
+ 'terms': _terms_query,
+
+ # Query JSON defined by the instance administrator.
+ 'custom': _custom_query,
+}
diff --git a/searx/search.py b/searx/search.py
index 04c6b2885..1cb2a603b 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -126,7 +126,8 @@ def send_http_request(engine, request_params):
req = requests_lib.get
else:
req = requests_lib.post
- request_args['data'] = request_params['data']
+
+ request_args['data'] = request_params['data']
# send the request
return req(request_params['url'], **request_args)
diff --git a/searx/settings.yml b/searx/settings.yml
index 5cab0a102..78ae26b97 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -231,6 +231,20 @@ engines:
shortcut : ew
disabled : True
+# - name : elasticsearch
+# shortcut : es
+# engine : elasticsearch
+# base_url : http://localhost:9200
+# username : elastic
+# password : changeme
+# index : my-index
+# # available options: match, simple_query_string, term, terms, custom
+# query_type : match
+# # if query_type is set to custom, provide your query here
+# #custom_query_json: {"query":{"match_all": {}}}
+# #show_metadata: False
+# disabled : True
+
- name : wikidata
engine : wikidata
shortcut : wd