summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Flament <alex@al-f.net>2020-10-26 20:40:24 +0100
committerAlexandre Flament <alex@al-f.net>2020-10-27 20:00:04 +0100
commit5e7060053cc382723af5daa1b4af42fe228b5292 (patch)
treedf9a53dc57d1e389a08f6f1212ec1b50d6e21e42
parentdb703a0283ee169381aeea97c678e666ae508348 (diff)
downloadsearxng-5e7060053cc382723af5daa1b4af42fe228b5292.tar.gz
searxng-5e7060053cc382723af5daa1b4af42fe228b5292.zip
[mod] ahmia_filter.py: minor changes
- use result['parsed_url'] - load ahmia_blacklist.txt in searx.datae
-rw-r--r--searx/data/__init__.py7
-rw-r--r--searx/plugins/ahmia_filter.py11
2 files changed, 10 insertions, 8 deletions
diff --git a/searx/data/__init__.py b/searx/data/__init__.py
index 391947bff..1116e5d47 100644
--- a/searx/data/__init__.py
+++ b/searx/data/__init__.py
@@ -2,7 +2,7 @@ import json
from pathlib import Path
-__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader']
+__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader', 'ahmia_blacklist_loader']
data_dir = Path(__file__).parent
@@ -16,6 +16,11 @@ def bangs_loader():
return load('bangs.json')
+def ahmia_blacklist_loader():
+ with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as fd:
+ return fd.read().split()
+
+
ENGINES_LANGUAGES = load('engines_languages.json')
CURRENCIES = load('currencies.json')
USER_AGENTS = load('useragents.json')
diff --git a/searx/plugins/ahmia_filter.py b/searx/plugins/ahmia_filter.py
index 8eb7f9413..83b05e4d2 100644
--- a/searx/plugins/ahmia_filter.py
+++ b/searx/plugins/ahmia_filter.py
@@ -3,9 +3,7 @@
'''
from hashlib import md5
-from os.path import join
-from urllib.parse import urlparse
-from searx import searx_dir
+from searx.data import ahmia_blacklist_loader
name = "Ahmia blacklist"
description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
@@ -18,15 +16,14 @@ ahmia_blacklist = None
def get_ahmia_blacklist():
global ahmia_blacklist
if not ahmia_blacklist:
- with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f:
- ahmia_blacklist = f.read().split()
+ ahmia_blacklist = ahmia_blacklist_loader()
return ahmia_blacklist
def not_blacklisted(result):
- if not result.get('is_onion'):
+ if not result.get('is_onion') or not result.get('parsed_url'):
return True
- result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest()
+ result_hash = md5(result['parsed_url'].hostname.encode()).hexdigest()
return result_hash not in get_ahmia_blacklist()