diff options
author | Alexandre Flament <alex@al-f.net> | 2020-10-26 20:40:24 +0100 |
---|---|---|
committer | Alexandre Flament <alex@al-f.net> | 2020-10-27 20:00:04 +0100 |
commit | 5e7060053cc382723af5daa1b4af42fe228b5292 (patch) | |
tree | df9a53dc57d1e389a08f6f1212ec1b50d6e21e42 /searx | |
parent | db703a0283ee169381aeea97c678e666ae508348 (diff) | |
download | searxng-5e7060053cc382723af5daa1b4af42fe228b5292.tar.gz searxng-5e7060053cc382723af5daa1b4af42fe228b5292.zip |
[mod] ahmia_filter.py: minor changes
- use result['parsed_url']
- load ahmia_blacklist.txt in searx.datae
Diffstat (limited to 'searx')
-rw-r--r-- | searx/data/__init__.py | 7 | ||||
-rw-r--r-- | searx/plugins/ahmia_filter.py | 11 |
2 files changed, 10 insertions, 8 deletions
diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 391947bff..1116e5d47 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -2,7 +2,7 @@ import json from pathlib import Path -__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader'] +__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader', 'ahmia_blacklist_loader'] data_dir = Path(__file__).parent @@ -16,6 +16,11 @@ def bangs_loader(): return load('bangs.json') +def ahmia_blacklist_loader(): + with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as fd: + return fd.read().split() + + ENGINES_LANGUAGES = load('engines_languages.json') CURRENCIES = load('currencies.json') USER_AGENTS = load('useragents.json') diff --git a/searx/plugins/ahmia_filter.py b/searx/plugins/ahmia_filter.py index 8eb7f9413..83b05e4d2 100644 --- a/searx/plugins/ahmia_filter.py +++ b/searx/plugins/ahmia_filter.py @@ -3,9 +3,7 @@ ''' from hashlib import md5 -from os.path import join -from urllib.parse import urlparse -from searx import searx_dir +from searx.data import ahmia_blacklist_loader name = "Ahmia blacklist" description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)" @@ -18,15 +16,14 @@ ahmia_blacklist = None def get_ahmia_blacklist(): global ahmia_blacklist if not ahmia_blacklist: - with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f: - ahmia_blacklist = f.read().split() + ahmia_blacklist = ahmia_blacklist_loader() return ahmia_blacklist def not_blacklisted(result): - if not result.get('is_onion'): + if not result.get('is_onion') or not result.get('parsed_url'): return True - result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest() + result_hash = md5(result['parsed_url'].hostname.encode()).hexdigest() return result_hash not in get_ahmia_blacklist() |