summaryrefslogtreecommitdiff
path: root/searx/engines/solr.py
blob: 4b80d5729d8b69dfeea5355c434d8dc67cdfbfba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# SPDX-License-Identifier: AGPL-3.0-or-later
""".. sidebar:: info

   - :origin:`solr.py <searx/engines/solr.py>`
   - `Solr <https://solr.apache.org>`_
   - `Solr Resources <https://solr.apache.org/resources.html>`_
   - `Install Solr <https://solr.apache.org/guide/installing-solr.html>`_

Solr_ is a popular search engine based on Lucene, just like Elasticsearch_.  But
instead of searching in indices, you can search in collections.

Example
=======

This is an example configuration for searching in the collection
``my-collection`` and get the results in ascending order.

.. code:: yaml

  - name: solr
    engine: solr
    shortcut: slr
    base_url: http://localhost:8983
    collection: my-collection
    sort: asc
    enable_http: true

"""

# pylint: disable=global-statement

from json import loads
from urllib.parse import urlencode
from searx.exceptions import SearxEngineAPIException


base_url = 'http://localhost:8983'
collection = ''
rows = 10
sort = ''  # sorting: asc or desc
field_list = 'name'  # list of field names to display on the UI
default_fields = ''  # default field to query
query_fields = ''  # query fields
_search_url = ''
paging = True


def init(_):
    if collection == '':
        raise ValueError('collection cannot be empty')

    global _search_url
    _search_url = base_url + '/solr/' + collection + '/select?{params}'


def request(query, params):
    query_params = {'q': query, 'rows': rows}
    if field_list != '':
        query_params['fl'] = field_list
    if query_fields != '':
        query_params['qf'] = query_fields
    if default_fields != '':
        query_params['df'] = default_fields
    if sort != '':
        query_params['sort'] = sort

    if 'pageno' in params:
        query_params['start'] = rows * (params['pageno'] - 1)

    params['url'] = _search_url.format(params=urlencode(query_params))

    return params


def response(resp):
    resp_json = __get_response(resp)

    results = []
    for result in resp_json['response']['docs']:
        r = {key: str(value) for key, value in result.items()}
        if len(r) == 0:
            continue
        r['template'] = 'key-value.html'
        results.append(r)

    return results


def __get_response(resp):
    try:
        resp_json = loads(resp.text)
    except Exception as e:
        raise SearxEngineAPIException("failed to parse response") from e

    if 'error' in resp_json:
        raise SearxEngineAPIException(resp_json['error']['msg'])

    return resp_json