summaryrefslogtreecommitdiff
path: root/searx/plugins/hostname_replace.py
blob: 51bd003b112b846c0a111152e6e68feaaee691a0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# SPDX-License-Identifier: AGPL-3.0-or-later

import re
from urllib.parse import urlunparse, urlparse
from searx import settings
from searx.plugins import logger
from flask_babel import gettext

name = gettext('Hostname replace')
description = gettext('Rewrite result hostnames or remove results based on the hostname')
default_on = False
preference_section = 'general'

plugin_id = 'hostname_replace'

replacements = {re.compile(p): r for (p, r) in settings[plugin_id].items()} if plugin_id in settings else {}

logger = logger.getChild(plugin_id)
parsed = 'parsed_url'
_url_fields = ['data_src', 'audio_src']


def on_result(request, search, result):

    for (pattern, replacement) in replacements.items():

        if parsed in result:
            if pattern.search(result[parsed].netloc):
                # to keep or remove this result from the result list depends
                # (only) on the 'parsed_url'
                if not replacement:
                    return False
                result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
                result['url'] = urlunparse(result[parsed])

        for url_field in _url_fields:
            if result.get(url_field):
                url_src = urlparse(result[url_field])
                if pattern.search(url_src.netloc):
                    if not replacement:
                        del result[url_field]
                    else:
                        url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc))
                        result[url_field] = urlunparse(url_src)

    return True