summaryrefslogtreecommitdiff
path: root/searx/plugins
diff options
context:
space:
mode:
authorMarc Abonce Seguin <marc-abonce@mailbox.org>2020-10-18 23:55:57 -0700
committerMarc Abonce Seguin <marc-abonce@mailbox.org>2020-10-25 17:59:43 -0700
commit32957cdf49c306a5f50ca78bb50c0978ffe5c072 (patch)
treefdf159379029c16a0fe3ed25ecd7963aa61bfd69 /searx/plugins
parentc3daa08537668c24224fffecbed4347fee936fcf (diff)
downloadsearxng-32957cdf49c306a5f50ca78bb50c0978ffe5c072.tar.gz
searxng-32957cdf49c306a5f50ca78bb50c0978ffe5c072.zip
add Ahmia filter plugin for onion results
Diffstat (limited to 'searx/plugins')
-rw-r--r--searx/plugins/__init__.py5
-rw-r--r--searx/plugins/ahmia_filter.py36
2 files changed, 41 insertions, 0 deletions
diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py
index b6dc4875b..8221f7c1d 100644
--- a/searx/plugins/__init__.py
+++ b/searx/plugins/__init__.py
@@ -28,6 +28,7 @@ from searx import logger, settings, static_path
logger = logger.getChild('plugins')
from searx.plugins import (oa_doi_rewrite,
+ ahmia_filter,
hash_plugin,
https_rewrite,
infinite_scroll,
@@ -181,3 +182,7 @@ if 'enabled_plugins' in settings:
plugin.default_on = True
else:
plugin.default_on = False
+
+# load tor specific plugins
+if settings['outgoing'].get('using_tor_proxy'):
+ plugins.register(ahmia_filter)
diff --git a/searx/plugins/ahmia_filter.py b/searx/plugins/ahmia_filter.py
new file mode 100644
index 000000000..8eb7f9413
--- /dev/null
+++ b/searx/plugins/ahmia_filter.py
@@ -0,0 +1,36 @@
+'''
+ SPDX-License-Identifier: AGPL-3.0-or-later
+'''
+
+from hashlib import md5
+from os.path import join
+from urllib.parse import urlparse
+from searx import searx_dir
+
+name = "Ahmia blacklist"
+description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
+default_on = True
+preference_section = 'onions'
+
+ahmia_blacklist = None
+
+
+def get_ahmia_blacklist():
+ global ahmia_blacklist
+ if not ahmia_blacklist:
+ with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f:
+ ahmia_blacklist = f.read().split()
+ return ahmia_blacklist
+
+
+def not_blacklisted(result):
+ if not result.get('is_onion'):
+ return True
+ result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest()
+ return result_hash not in get_ahmia_blacklist()
+
+
+def post_search(request, search):
+ filtered_results = list(filter(not_blacklisted, search.result_container._merged_results))
+ search.result_container._merged_results = filtered_results
+ return True