summaryrefslogtreecommitdiff
path: root/searx/engines/elasticsearch.py
blob: c721114a74cd6b7e1a7b9eeff0e74b2b96baf673 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# SPDX-License-Identifier: AGPL-3.0-or-later
""".. sidebar:: info

   - :origin:`elasticsearch.py <searx/engines/elasticsearch.py>`
   - `Elasticsearch <https://www.elastic.co/elasticsearch/>`_
   - `Elasticsearch Guide
     <https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html>`_
   - `Install Elasticsearch
     <https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html>`_

Elasticsearch_ supports numerous ways to query the data it is storing.  At the
moment the engine supports the most popular search methods (``query_type``):

- ``match``,
- ``simple_query_string``,
- ``term`` and
- ``terms``.

If none of the methods fit your use case, you can select ``custom`` query type
and provide the JSON payload to submit to Elasticsearch in
``custom_query_json``.

Example
=======

The following is an example configuration for an Elasticsearch_ instance with
authentication configured to read from ``my-index`` index.

.. code:: yaml

  - name: elasticsearch
    shortcut: es
    engine: elasticsearch
    base_url: http://localhost:9200
    username: elastic
    password: changeme
    index: my-index
    query_type: match
    # custom_query_json: '{ ... }'
    enable_http: true

"""

from json import loads, dumps
from searx.exceptions import SearxEngineAPIException


base_url = 'http://localhost:9200'
username = ''
password = ''
index = ''
search_url = '{base_url}/{index}/_search'
query_type = 'match'
custom_query_json = {}
show_metadata = False
categories = ['general']


def init(engine_settings):
    if 'query_type' in engine_settings and engine_settings['query_type'] not in _available_query_types:
        raise ValueError('unsupported query type', engine_settings['query_type'])

    if index == '':
        raise ValueError('index cannot be empty')


def request(query, params):
    if query_type not in _available_query_types:
        return params

    if username and password:
        params['auth'] = (username, password)

    params['url'] = search_url.format(base_url=base_url, index=index)
    params['method'] = 'GET'
    params['data'] = dumps(_available_query_types[query_type](query))
    params['headers']['Content-Type'] = 'application/json'

    return params


def _match_query(query):
    """
    The standard for full text queries.
    searx format: "key:value" e.g. city:berlin
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
    """

    try:
        key, value = query.split(':')
    except Exception as e:
        raise ValueError('query format must be "key:value"') from e

    return {"query": {"match": {key: {'query': value}}}}


def _simple_query_string_query(query):
    """
    Accepts query strings, but it is less strict than query_string
    The field used can be specified in index.query.default_field in Elasticsearch.
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html
    """

    return {'query': {'simple_query_string': {'query': query}}}


def _term_query(query):
    """
    Accepts one term and the name of the field.
    searx format: "key:value" e.g. city:berlin
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html
    """

    try:
        key, value = query.split(':')
    except Exception as e:
        raise ValueError('query format must be key:value') from e

    return {'query': {'term': {key: value}}}


def _terms_query(query):
    """
    Accepts multiple terms and the name of the field.
    searx format: "key:value1,value2" e.g. city:berlin,paris
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
    """

    try:
        key, values = query.split(':')
    except Exception as e:
        raise ValueError('query format must be key:value1,value2') from e

    return {'query': {'terms': {key: values.split(',')}}}


def _custom_query(query):
    key, value = query.split(':')
    custom_query = custom_query_json
    for query_key, query_value in custom_query.items():
        if query_key == '{{KEY}}':
            custom_query[key] = custom_query.pop(query_key)
        if query_value == '{{VALUE}}':
            custom_query[query_key] = value
    return custom_query


def response(resp):
    results = []

    resp_json = loads(resp.text)
    if 'error' in resp_json:
        raise SearxEngineAPIException(resp_json['error'])

    for result in resp_json['hits']['hits']:
        r = {key: str(value) if not key.startswith('_') else value for key, value in result['_source'].items()}
        r['template'] = 'key-value.html'

        if show_metadata:
            r['metadata'] = {'index': result['_index'], 'id': result['_id'], 'score': result['_score']}

        results.append(r)

    return results


_available_query_types = {
    # Full text queries
    # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
    'match': _match_query,
    'simple_query_string': _simple_query_string_query,
    # Term-level queries
    # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
    'term': _term_query,
    'terms': _terms_query,
    # Query JSON defined by the instance administrator.
    'custom': _custom_query,
}