diff options
author | Bnyro <bnyro@tutanota.com> | 2024-05-05 20:43:45 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-06-07 14:42:52 +0200 |
commit | 3bec04079c027b952dee95dab194f29ea12e12a5 (patch) | |
tree | 42fa4444622987a19e96a129b7efcb9b9d496595 /searx/plugins | |
parent | d4c3d309952b3a50e553808d1c367ec5ca23d62c (diff) | |
download | searxng-3bec04079c027b952dee95dab194f29ea12e12a5.tar.gz searxng-3bec04079c027b952dee95dab194f29ea12e12a5.zip |
[feat] hostname replace plugin: possibility to prioritize certain websites
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx/plugins')
-rw-r--r-- | searx/plugins/hostname_replace.py | 49 | ||||
-rw-r--r-- | searx/plugins/hostnames.py | 73 |
2 files changed, 73 insertions, 49 deletions
diff --git a/searx/plugins/hostname_replace.py b/searx/plugins/hostname_replace.py deleted file mode 100644 index 1b3f8609c..000000000 --- a/searx/plugins/hostname_replace.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -# pylint: disable=missing-module-docstring - -import re -from urllib.parse import urlunparse, urlparse - -from flask_babel import gettext - -from searx import settings -from searx.plugins import logger - -name = gettext('Hostname replace') -description = gettext('Rewrite result hostnames or remove results based on the hostname') -default_on = False -preference_section = 'general' - -plugin_id = 'hostname_replace' - -replacements = {re.compile(p): r for (p, r) in settings[plugin_id].items()} if plugin_id in settings else {} - -logger = logger.getChild(plugin_id) -parsed = 'parsed_url' -_url_fields = ['iframe_src', 'audio_src'] - - -def on_result(_request, _search, result): - - for pattern, replacement in replacements.items(): - - if parsed in result: - if pattern.search(result[parsed].netloc): - # to keep or remove this result from the result list depends - # (only) on the 'parsed_url' - if not replacement: - return False - result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc)) - result['url'] = urlunparse(result[parsed]) - - for url_field in _url_fields: - if result.get(url_field): - url_src = urlparse(result[url_field]) - if pattern.search(url_src.netloc): - if not replacement: - del result[url_field] - else: - url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc)) - result[url_field] = urlunparse(url_src) - - return True diff --git a/searx/plugins/hostnames.py b/searx/plugins/hostnames.py new file mode 100644 index 000000000..515a45259 --- /dev/null +++ b/searx/plugins/hostnames.py @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring + +import re +from urllib.parse import urlunparse, urlparse + +from flask_babel import gettext + +from searx import settings +from searx.plugins import logger + +name = gettext('Hostnames plugin') +description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname') +default_on = False +preference_section = 'general' + +plugin_id = 'hostnames' + +replacements = { + re.compile(p): r + for (p, r) in (settings.get(plugin_id, {}).get('replace', settings.get('hostname_replace', {})).items()) +} +removables = {re.compile(p) for p in settings[plugin_id].get('remove', [])} +high_priority = {re.compile(p) for p in settings[plugin_id].get('high_priority', [])} +low_priority = {re.compile(p) for p in settings[plugin_id].get('low_priority', [])} + +logger = logger.getChild(plugin_id) +parsed = 'parsed_url' +_url_fields = ['iframe_src', 'audio_src'] + + +def _matches_parsed_url(result, pattern): + return parsed in result and pattern.search(result[parsed].netloc) + + +def on_result(_request, _search, result): + for pattern, replacement in replacements.items(): + if _matches_parsed_url(result, pattern): + logger.debug(result['url']) + result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc)) + result['url'] = urlunparse(result[parsed]) + logger.debug(result['url']) + + for url_field in _url_fields: + if not result.get(url_field): + continue + + url_src = urlparse(result[url_field]) + if pattern.search(url_src.netloc): + url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc)) + result[url_field] = urlunparse(url_src) + + for pattern in removables: + if _matches_parsed_url(result, pattern): + return False + + for url_field in _url_fields: + if not result.get(url_field): + continue + + url_src = urlparse(result[url_field]) + if pattern.search(url_src.netloc): + del result[url_field] + + for pattern in low_priority: + if _matches_parsed_url(result, pattern): + result['priority'] = 'low' + + for pattern in high_priority: + if _matches_parsed_url(result, pattern): + result['priority'] = 'high' + + return True |