diff options
author | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2020-10-18 23:55:57 -0700 |
---|---|---|
committer | Marc Abonce Seguin <marc-abonce@mailbox.org> | 2020-10-25 17:59:43 -0700 |
commit | 32957cdf49c306a5f50ca78bb50c0978ffe5c072 (patch) | |
tree | fdf159379029c16a0fe3ed25ecd7963aa61bfd69 /searx/plugins | |
parent | c3daa08537668c24224fffecbed4347fee936fcf (diff) | |
download | searxng-32957cdf49c306a5f50ca78bb50c0978ffe5c072.tar.gz searxng-32957cdf49c306a5f50ca78bb50c0978ffe5c072.zip |
add Ahmia filter plugin for onion results
Diffstat (limited to 'searx/plugins')
-rw-r--r-- | searx/plugins/__init__.py | 5 | ||||
-rw-r--r-- | searx/plugins/ahmia_filter.py | 36 |
2 files changed, 41 insertions, 0 deletions
diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index b6dc4875b..8221f7c1d 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -28,6 +28,7 @@ from searx import logger, settings, static_path logger = logger.getChild('plugins') from searx.plugins import (oa_doi_rewrite, + ahmia_filter, hash_plugin, https_rewrite, infinite_scroll, @@ -181,3 +182,7 @@ if 'enabled_plugins' in settings: plugin.default_on = True else: plugin.default_on = False + +# load tor specific plugins +if settings['outgoing'].get('using_tor_proxy'): + plugins.register(ahmia_filter) diff --git a/searx/plugins/ahmia_filter.py b/searx/plugins/ahmia_filter.py new file mode 100644 index 000000000..8eb7f9413 --- /dev/null +++ b/searx/plugins/ahmia_filter.py @@ -0,0 +1,36 @@ +''' + SPDX-License-Identifier: AGPL-3.0-or-later +''' + +from hashlib import md5 +from os.path import join +from urllib.parse import urlparse +from searx import searx_dir + +name = "Ahmia blacklist" +description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)" +default_on = True +preference_section = 'onions' + +ahmia_blacklist = None + + +def get_ahmia_blacklist(): + global ahmia_blacklist + if not ahmia_blacklist: + with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f: + ahmia_blacklist = f.read().split() + return ahmia_blacklist + + +def not_blacklisted(result): + if not result.get('is_onion'): + return True + result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest() + return result_hash not in get_ahmia_blacklist() + + +def post_search(request, search): + filtered_results = list(filter(not_blacklisted, search.result_container._merged_results)) + search.result_container._merged_results = filtered_results + return True |