diff options
author | Bnyro <bnyro@tutanota.com> | 2024-05-05 23:17:35 +0200 |
---|---|---|
committer | Markus Heiser <markus.heiser@darmarIT.de> | 2024-06-07 14:42:52 +0200 |
commit | aa59bfbf60d75508fc1f91220ed2598bf8cf97ec (patch) | |
tree | 8b975458c723a86b4b38d5882320fa8c0283dcdf /searx | |
parent | 3bec04079c027b952dee95dab194f29ea12e12a5 (diff) | |
download | searxng-aa59bfbf60d75508fc1f91220ed2598bf8cf97ec.tar.gz searxng-aa59bfbf60d75508fc1f91220ed2598bf8cf97ec.zip |
[feat] hostname replace plugin: support for external list file
Diffstat (limited to 'searx')
-rw-r--r-- | searx/plugins/hostnames.py | 36 | ||||
-rw-r--r-- | searx/settings.yml | 10 | ||||
-rw-r--r-- | searx/settings_loader.py | 8 |
3 files changed, 45 insertions, 9 deletions
diff --git a/searx/plugins/hostnames.py b/searx/plugins/hostnames.py index 515a45259..6ab6147dd 100644 --- a/searx/plugins/hostnames.py +++ b/searx/plugins/hostnames.py @@ -1,5 +1,5 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -# pylint: disable=missing-module-docstring +# pylint: disable=missing-module-docstring, too-many-branches import re from urllib.parse import urlunparse, urlparse @@ -8,6 +8,7 @@ from flask_babel import gettext from searx import settings from searx.plugins import logger +from searx.settings_loader import get_yaml_file name = gettext('Hostnames plugin') description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname') @@ -16,19 +17,36 @@ preference_section = 'general' plugin_id = 'hostnames' -replacements = { - re.compile(p): r - for (p, r) in (settings.get(plugin_id, {}).get('replace', settings.get('hostname_replace', {})).items()) -} -removables = {re.compile(p) for p in settings[plugin_id].get('remove', [])} -high_priority = {re.compile(p) for p in settings[plugin_id].get('high_priority', [])} -low_priority = {re.compile(p) for p in settings[plugin_id].get('low_priority', [])} - logger = logger.getChild(plugin_id) parsed = 'parsed_url' _url_fields = ['iframe_src', 'audio_src'] +def _load_regular_expressions(settings_key): + setting_value = settings.get(plugin_id, {}).get(settings_key) + + if not setting_value: + return {} + + # load external file with configuration + if isinstance(setting_value, str): + setting_value = get_yaml_file(setting_value) + + if isinstance(setting_value, list): + return {re.compile(r) for r in setting_value} + + if isinstance(setting_value, dict): + return {re.compile(p): r for (p, r) in setting_value.items()} + + return {} + + +replacements = _load_regular_expressions('replace') +removables = _load_regular_expressions('remove') +high_priority = _load_regular_expressions('high_priority') +low_priority = _load_regular_expressions('low_priority') + + def _matches_parsed_url(result, pattern): return parsed in result and pattern.search(result[parsed].netloc) diff --git a/searx/settings.yml b/searx/settings.yml index 8a1c00ba8..db749be77 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -243,6 +243,16 @@ outgoing: # - '(.*\.)?google(\..*)?$' # high_priority: # - '(.*\.)?wikipedia.org$' +# +# Alternatively you can use external files for configuring the "Hostnames plugin": +# +# hostnames: +# replace: 'rewrite-hosts.yml' +# +# Content of 'rewrite-hosts.yml' (place the file in the same directory as 'settings.yml'): +# '(.*\.)?youtube\.com$': 'invidious.example.com' +# '(.*\.)?youtu\.be$': 'invidious.example.com' +# checker: # disable checker when in debug mode diff --git a/searx/settings_loader.py b/searx/settings_loader.py index fe2696cce..6bf3465f0 100644 --- a/searx/settings_loader.py +++ b/searx/settings_loader.py @@ -31,6 +31,14 @@ def load_yaml(file_name): raise SearxSettingsException(e, file_name) from e +def get_yaml_file(file_name): + path = existing_filename_or_none(join(searx_dir, file_name)) + if path is None: + raise FileNotFoundError(f"File {file_name} does not exist!") + + return load_yaml(path) + + def get_default_settings_path(): return existing_filename_or_none(join(searx_dir, 'settings.yml')) |