summaryrefslogtreecommitdiff
path: root/searx/search/processors/online_url_search.py
blob: a1dd6a018cdf171a4ff1cfe78c81a6fa5d702656 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Processores for engine-type: ``online_url_search``

"""

import re
from .online import OnlineProcessor

re_search_urls = {
    'http': re.compile(r'https?:\/\/[^ ]*'),
    'ftp': re.compile(r'ftps?:\/\/[^ ]*'),
    'data:image': re.compile('data:image/[^; ]*;base64,[^ ]*'),
}


class OnlineUrlSearchProcessor(OnlineProcessor):
    """Processor class used by ``online_url_search`` engines."""

    engine_type = 'online_url_search'

    def get_params(self, search_query, engine_category):
        """Returns a set of :ref:`request params <engine request online>` or ``None`` if
        search query does not match to :py:obj:`re_search_urls`.
        """

        params = super().get_params(search_query, engine_category)
        if params is None:
            return None

        url_match = False
        search_urls = {}

        for k, v in re_search_urls.items():
            m = v.search(search_query.query)
            v = None
            if m:
                url_match = True
                v = m[0]
            search_urls[k] = v

        if not url_match:
            return None

        params['search_urls'] = search_urls
        return params