diff options
32 files changed, 2075 insertions, 429 deletions
@@ -37,6 +37,7 @@ exclude = .*,__pycache__,resources.py # (numpy-style) # D413: Missing blank line after last section (not in pep257?) # A003: Builtin name for class attribute (needed for overridden methods) +# W503: like break before binary operator # W504: line break after binary operator # FI15: __future__ import "generator_stop" missing ignore = @@ -47,7 +48,7 @@ ignore = P101,P102,P103, D102,D103,D106,D107,D104,D105,D209,D211,D401,D402,D403,D412,D413, A003, - W504, + W503, W504 FI15 min-version = 3.6.0 max-complexity = 12 @@ -46,6 +46,7 @@ disable=locally-disabled, too-many-statements, too-few-public-methods, import-outside-toplevel, + bad-continuation # This lint disagrees with Black [BASIC] function-rgx=[a-z_][a-z0-9_]{2,50}$ diff --git a/README.asciidoc b/README.asciidoc index bb033e6bd..42013368c 100644 --- a/README.asciidoc +++ b/README.asciidoc @@ -102,6 +102,7 @@ The following software and libraries are required to run qutebrowser: The following libraries are optional: +* https://pypi.org/project/adblock/[adblock] (for improved adblocking using ABP syntax) * On Windows, https://pypi.python.org/pypi/colorama/[colorama] for colored log output. * http://asciidoc.org/[asciidoc] to generate the documentation for the `:help` diff --git a/doc/help/commands.asciidoc b/doc/help/commands.asciidoc index 5dc38e631..eb8e4925d 100644 --- a/doc/help/commands.asciidoc +++ b/doc/help/commands.asciidoc @@ -29,7 +29,7 @@ possible to run or bind multiple commands by separating them with `;;`. [options="header",width="75%",cols="25%,75%"] |============== |Command|Description -|<<adblock-update,adblock-update>>|Update the adblock block lists. +|<<adblock-update,adblock-update>>|Update block lists for both the host- and the Brave ad blocker. |<<back,back>>|Go back in the history of the current tab. |<<bind,bind>>|Bind a key to a command. |<<bookmark-add,bookmark-add>>|Save the current page as a bookmark, or a specific url. @@ -139,9 +139,7 @@ possible to run or bind multiple commands by separating them with `;;`. |============== [[adblock-update]] === adblock-update -Update the adblock block lists. - -This updates `~/.local/share/qutebrowser/blocked-hosts` with downloaded host lists and re-reads `~/.config/qutebrowser/blocked-hosts`. +Update block lists for both the host- and the Brave ad blocker. [[back]] === back diff --git a/doc/help/settings.asciidoc b/doc/help/settings.asciidoc index 482bbbaf2..19b4757b1 100644 --- a/doc/help/settings.asciidoc +++ b/doc/help/settings.asciidoc @@ -137,6 +137,11 @@ |<<completion.web_history.max_items,completion.web_history.max_items>>|Number of URLs to show in the web history. |<<confirm_quit,confirm_quit>>|Require a confirmation before quitting the application. |<<content.autoplay,content.autoplay>>|Automatically start playing `<video>` elements. +|<<content.blocking.adblock.lists,content.blocking.adblock.lists>>|List of URLs to ABP-style adblocking rulesets. +|<<content.blocking.enabled,content.blocking.enabled>>|Enable the ad blocker +|<<content.blocking.hosts.lists,content.blocking.hosts.lists>>|List of URLs of lists which contain hosts to block. +|<<content.blocking.method,content.blocking.method>>|Which method of blocking ads should be used. +|<<content.blocking.whitelist,content.blocking.whitelist>>|A list of patterns that should always be loaded, despite being ad-blocked. Local domains are always exempt from adblocking. |<<content.cache.appcache,content.cache.appcache>>|Enable support for the HTML 5 web application cache feature. |<<content.cache.maximum_pages,content.cache.maximum_pages>>|Maximum number of pages to hold in the global memory page cache. |<<content.cache.size,content.cache.size>>|Size (in bytes) of the HTTP network cache. Null to use the default value. @@ -155,9 +160,6 @@ |<<content.headers.do_not_track,content.headers.do_not_track>>|Value to send in the `DNT` header. |<<content.headers.referer,content.headers.referer>>|When to send the Referer header. |<<content.headers.user_agent,content.headers.user_agent>>|User agent to send. -|<<content.host_blocking.enabled,content.host_blocking.enabled>>|Enable host blocking. -|<<content.host_blocking.lists,content.host_blocking.lists>>|List of URLs of lists which contain hosts to block. -|<<content.host_blocking.whitelist,content.host_blocking.whitelist>>|A list of patterns that should always be loaded, despite being ad-blocked. |<<content.hyperlink_auditing,content.hyperlink_auditing>>|Enable hyperlink auditing (`<a ping>`). |<<content.images,content.images>>|Load images automatically in web pages. |<<content.javascript.alert,content.javascript.alert>>|Show javascript alerts. @@ -1903,6 +1905,80 @@ Default: +pass:[true]+ This setting is only available with the QtWebEngine backend. +[[content.blocking.adblock.lists]] +=== content.blocking.adblock.lists +List of URLs to ABP-style adblocking rulesets. + + +Type: <<types,List of Url>> + +Default: + +- +pass:[https://easylist.to/easylist/easylist.txt]+ +- +pass:[https://easylist.to/easylist/easyprivacy.txt]+ + +[[content.blocking.enabled]] +=== content.blocking.enabled +Enable the ad blocker + +This setting supports URL patterns. + +Type: <<types,Bool>> + +Default: +pass:[true]+ + +[[content.blocking.hosts.lists]] +=== content.blocking.hosts.lists +List of URLs of lists which contain hosts to block. + +The file can be in one of the following formats: + +- An `/etc/hosts`-like file +- One host per line +- A zip-file of any of the above, with either only one file, or a file + named `hosts` (with any extension). + +It's also possible to add a local file or directory via a `file://` URL. In +case of a directory, all files in the directory are read as adblock lists. + +The file `~/.config/qutebrowser/blocked-hosts` is always read if it exists. + + +Type: <<types,List of Url>> + +Default: + +- +pass:[https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts]+ + +[[content.blocking.method]] +=== content.blocking.method +Which method of blocking ads should be used. + +Support for Adblock Plus syntax blocklists requires the `adblock` Python +package to be installed, which is an optional dependency of qutebrowser. +It is required when either `adblock` or `both` are selected. + + +Type: <<types,String>> + +Valid values: + + * +auto+: Use Brave's ABP-style adblocker if available, host blocking otherwise + * +adblock+: Use Brave's ABP-style adblocker + * +hosts+: Use hosts blocking + * +both+: Use both hosts blocking and Brave's ABP-style adblocker + +Default: +pass:[auto]+ + +[[content.blocking.whitelist]] +=== content.blocking.whitelist +A list of patterns that should always be loaded, despite being ad-blocked. Local domains are always exempt from adblocking. +Note this whitelists otherwise blocked requests, not first-party URLs. As an example, if `example.org` loads an ad from `ads.example.org`, the whitelist entry could be `https://ads.example.org/*`. If you want to disable the adblocker on a given page, use the `content.host_blocking.enabled` setting with a URL pattern instead. + +Type: <<types,List of UrlPattern>> + +Default: empty + [[content.cache.appcache]] === content.cache.appcache Enable support for the HTML 5 web application cache feature. @@ -2141,49 +2217,6 @@ Type: <<types,FormatString>> Default: +pass:[Mozilla/5.0 ({os_info}) AppleWebKit/{webkit_version} (KHTML, like Gecko) {qt_key}/{qt_version} {upstream_browser_key}/{upstream_browser_version} Safari/{webkit_version}]+ -[[content.host_blocking.enabled]] -=== content.host_blocking.enabled -Enable host blocking. - -This setting supports URL patterns. - -Type: <<types,Bool>> - -Default: +pass:[true]+ - -[[content.host_blocking.lists]] -=== content.host_blocking.lists -List of URLs of lists which contain hosts to block. - -The file can be in one of the following formats: - -- An `/etc/hosts`-like file -- One host per line -- A zip-file of any of the above, with either only one file, or a file - named `hosts` (with any extension). - -It's also possible to add a local file or directory via a `file://` URL. In -case of a directory, all files in the directory are read as adblock lists. - -The file `~/.config/qutebrowser/blocked-hosts` is always read if it exists. - - -Type: <<types,List of Url>> - -Default: - -- +pass:[https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts]+ - -[[content.host_blocking.whitelist]] -=== content.host_blocking.whitelist -A list of patterns that should always be loaded, despite being ad-blocked. -Note this whitelists blocked hosts, not first-party URLs. As an example, if `example.org` loads an ad from `ads.example.org`, the whitelisted host should be `ads.example.org`. If you want to disable the adblocker on a given page, use the `content.host_blocking.enabled` setting with a URL pattern instead. -Local domains are always exempt from hostblocking. - -Type: <<types,List of UrlPattern>> - -Default: empty - [[content.hyperlink_auditing]] === content.hyperlink_auditing Enable hyperlink auditing (`<a ping>`). diff --git a/misc/requirements/requirements-qutebrowser.txt-raw b/misc/requirements/requirements-qutebrowser.txt-raw index a01e56712..2d527aeef 100644 --- a/misc/requirements/requirements-qutebrowser.txt-raw +++ b/misc/requirements/requirements-qutebrowser.txt-raw @@ -4,6 +4,7 @@ pyPEG2 PyYAML colorama attrs +adblock # Optional, for improved adblocking importlib-resources #@ markers: importlib-resources python_version<"3.9" diff --git a/qutebrowser/components/adblock.py b/qutebrowser/components/adblock.py index f9b0a583b..daf254cac 100644 --- a/qutebrowser/components/adblock.py +++ b/qutebrowser/components/adblock.py @@ -17,24 +17,31 @@ # You should have received a copy of the GNU General Public License # along with qutebrowser. If not, see <http://www.gnu.org/licenses/>. -"""Functions related to ad blocking.""" +"""Functions related to host blocking.""" import os.path -import functools import posixpath import zipfile import logging import pathlib -from typing import cast, IO, List, Set +from typing import cast, IO, Set from PyQt5.QtCore import QUrl -from qutebrowser.api import (cmdutils, hook, config, message, downloads, - interceptor, apitypes, qtutils) +from qutebrowser.api import ( + hook, + config, + message, + interceptor, + apitypes, + qtutils, +) +from qutebrowser.components.utils import blockutils +from qutebrowser.components import braveadblock -logger = logging.getLogger('network') -_host_blocker = cast('HostBlocker', None) +logger = logging.getLogger("network") +host_blocker = cast("HostBlocker", None) def _guess_zip_filename(zf: zipfile.ZipFile) -> str: @@ -44,7 +51,7 @@ def _guess_zip_filename(zf: zipfile.ZipFile) -> str: return files[0] else: for e in files: - if posixpath.splitext(e)[0].lower() == 'hosts': + if posixpath.splitext(e)[0].lower() == "hosts": return e raise FileNotFoundError("No hosts file found in zip") @@ -56,28 +63,19 @@ def get_fileobj(byte_io: IO[bytes]) -> IO[bytes]: byte_io.seek(0) # rewind what zipfile.is_zipfile did zf = zipfile.ZipFile(byte_io) filename = _guess_zip_filename(zf) - byte_io = zf.open(filename, mode='r') + byte_io = zf.open(filename, mode="r") else: byte_io.seek(0) # rewind what zipfile.is_zipfile did return byte_io -def _is_whitelisted_url(url: QUrl) -> bool: - """Check if the given URL is on the adblock whitelist.""" - for pattern in config.val.content.host_blocking.whitelist: - if pattern.matches(url): - return True - return False - - -class _FakeDownload(downloads.TempDownload): - - """A download stub to use on_download_finished with local files.""" - - def __init__(self, # pylint: disable=super-init-not-called - fileobj: IO[bytes]) -> None: - self.fileobj = fileobj - self.successful = True +def _should_be_used() -> bool: + """Whether the hostblocker should be used or not.""" + method = config.val.content.blocking.method + adblock_dependency_satisfied = braveadblock.ad_blocker is None + return method in ("both", "hosts") or ( + method == "auto" and not adblock_dependency_satisfied + ) class HostBlocker: @@ -85,51 +83,57 @@ class HostBlocker: """Manage blocked hosts based from /etc/hosts-like files. Attributes: + enabled: Given the current blocking method, should the host blocker be enabled? _blocked_hosts: A set of blocked hosts. _config_blocked_hosts: A set of blocked hosts from ~/.config. - _in_progress: The DownloadItems which are currently downloading. - _done_count: How many files have been read successfully. _local_hosts_file: The path to the blocked-hosts file. _config_hosts_file: The path to a blocked-hosts in ~/.config _has_basedir: Whether a custom --basedir is set. """ - def __init__(self, *, data_dir: pathlib.Path, config_dir: pathlib.Path, - has_basedir: bool = False) -> None: + def __init__( + self, + *, + data_dir: pathlib.Path, + config_dir: pathlib.Path, + has_basedir: bool = False + ) -> None: + self.enabled = _should_be_used() self._has_basedir = has_basedir self._blocked_hosts: Set[str] = set() self._config_blocked_hosts: Set[str] = set() - self._in_progress: List[downloads.TempDownload] = [] - self._done_count = 0 - self._local_hosts_file = str(data_dir / 'blocked-hosts') + self._local_hosts_file = str(data_dir / "blocked-hosts") self.update_files() - self._config_hosts_file = str(config_dir / 'blocked-hosts') + self._config_hosts_file = str(config_dir / "blocked-hosts") - def _is_blocked(self, request_url: QUrl, - first_party_url: QUrl = None) -> bool: + def _is_blocked(self, request_url: QUrl, first_party_url: QUrl = None) -> bool: """Check whether the given request is blocked.""" + if not self.enabled: + return False + if first_party_url is not None and not first_party_url.isValid(): first_party_url = None qtutils.ensure_valid(request_url) - if not config.get('content.host_blocking.enabled', - url=first_party_url): + if not config.get("content.blocking.enabled", url=first_party_url): return False host = request_url.host() - return ((host in self._blocked_hosts or - host in self._config_blocked_hosts) and - not _is_whitelisted_url(request_url)) + return ( + host in self._blocked_hosts or host in self._config_blocked_hosts + ) and not blockutils.is_whitelisted_url(request_url) def filter_request(self, info: interceptor.Request) -> None: """Block the given request if necessary.""" - if self._is_blocked(request_url=info.request_url, - first_party_url=info.first_party_url): - logger.debug("Request to {} blocked by host blocker." - .format(info.request_url.host())) + if self._is_blocked( + request_url=info.request_url, first_party_url=info.first_party_url + ): + logger.debug( + "Request to {} blocked by host blocker.".format(info.request_url.host()) + ) info.block() def _read_hosts_line(self, raw_line: bytes) -> Set[str]: @@ -142,15 +146,15 @@ class HostBlocker: A set containing valid hosts found in the line. """ - if raw_line.startswith(b'#'): + if raw_line.startswith(b"#"): # Ignoring comments early so we don't have to care about # encoding errors in them return set() - line = raw_line.decode('utf-8') + line = raw_line.decode("utf-8") # Remove comments - hash_idx = line.find('#') + hash_idx = line.find("#") line = line if hash_idx == -1 else line[:hash_idx] parts = line.strip().split() @@ -163,9 +167,7 @@ class HostBlocker: filtered_hosts = set() for host in hosts: - if ('.' in host and - not host.endswith('.localdomain') and - host != '0.0.0.0'): + if "." in host and not host.endswith(".localdomain") and host != "0.0.0.0": filtered_hosts.update([host]) return filtered_hosts @@ -184,7 +186,7 @@ class HostBlocker: return False try: - with open(filename, 'rb') as f: + with open(filename, "rb") as f: for line in f: target |= self._read_hosts_line(line) @@ -197,54 +199,30 @@ class HostBlocker: """Read hosts from the existing blocked-hosts file.""" self._blocked_hosts = set() - self._read_hosts_file(self._config_hosts_file, - self._config_blocked_hosts) + self._read_hosts_file(self._config_hosts_file, self._config_blocked_hosts) - found = self._read_hosts_file(self._local_hosts_file, - self._blocked_hosts) + found = self._read_hosts_file(self._local_hosts_file, self._blocked_hosts) if not found: - if (config.val.content.host_blocking.lists and - not self._has_basedir and - config.val.content.host_blocking.enabled): + if ( + config.val.content.blocking.hosts.lists + and not self._has_basedir + and config.val.content.blocking.enabled + and self.enabled + ): message.info("Run :adblock-update to get adblock lists.") - def adblock_update(self) -> None: + def adblock_update(self) -> blockutils.BlocklistDownloads: """Update the adblock block lists.""" - self._read_hosts_file(self._config_hosts_file, - self._config_blocked_hosts) + self._read_hosts_file(self._config_hosts_file, self._config_blocked_hosts) self._blocked_hosts = set() - self._done_count = 0 - for url in config.val.content.host_blocking.lists: - if url.scheme() == 'file': - filename = url.toLocalFile() - if os.path.isdir(filename): - for entry in os.scandir(filename): - if entry.is_file(): - self._import_local(entry.path) - else: - self._import_local(filename) - else: - download = downloads.download_temp(url) - self._in_progress.append(download) - download.finished.connect( - functools.partial(self._on_download_finished, download)) - - def _import_local(self, filename: str) -> None: - """Adds the contents of a file to the blocklist. - Args: - filename: path to a local file to import. - """ - try: - fileobj = open(filename, 'rb') - except OSError as e: - message.error("adblock: Error while reading {}: {}".format( - filename, e.strerror)) - return - download = _FakeDownload(fileobj) - self._in_progress.append(download) - self._on_download_finished(download) + blocklists = config.val.content.blocking.hosts.lists + dl = blockutils.BlocklistDownloads(blocklists) + dl.single_download_finished.connect(self._merge_file) + dl.all_downloads_finished.connect(self._on_lists_downloaded) + dl.initiate() + return dl def _merge_file(self, byte_io: IO[bytes]) -> None: """Read and merge host files. @@ -256,10 +234,12 @@ class HostBlocker: line_count = 0 try: f = get_fileobj(byte_io) - except (OSError, zipfile.BadZipFile, zipfile.LargeZipFile, - LookupError) as e: - message.error("adblock: Error while reading {}: {} - {}".format( - byte_io.name, e.__class__.__name__, e)) + except (OSError, zipfile.BadZipFile, zipfile.LargeZipFile, LookupError) as e: + message.error( + "hostblock: Error while reading {}: {} - {}".format( + byte_io.name, e.__class__.__name__, e + ) + ) return for line in f: @@ -272,20 +252,27 @@ class HostBlocker: logger.debug("{}: read {} lines".format(byte_io.name, line_count)) if error_count > 0: - message.error("adblock: {} read errors for {}".format( - error_count, byte_io.name)) + message.error( + "hostblock: {} read errors for {}".format(error_count, byte_io.name) + ) - def _on_lists_downloaded(self) -> None: + def _on_lists_downloaded(self, done_count: int) -> None: """Install block lists after files have been downloaded.""" - with open(self._local_hosts_file, 'w', encoding='utf-8') as f: - for host in sorted(self._blocked_hosts): - f.write(host + '\n') - message.info("adblock: Read {} hosts from {} sources.".format( - len(self._blocked_hosts), self._done_count)) + try: + with open(self._local_hosts_file, "w", encoding="utf-8") as f: + for host in sorted(self._blocked_hosts): + f.write(host + "\n") + message.info( + "hostblock: Read {} hosts from {} sources.".format( + len(self._blocked_hosts), done_count + ) + ) + except OSError: + logger.exception("Failed to write host block list!") def update_files(self) -> None: """Update files when the config changed.""" - if not config.val.content.host_blocking.lists: + if not config.val.content.blocking.hosts.lists: try: os.remove(self._local_hosts_file) except FileNotFoundError: @@ -293,52 +280,25 @@ class HostBlocker: except OSError as e: logger.exception("Failed to delete hosts file: {}".format(e)) - def _on_download_finished(self, download: downloads.TempDownload) -> None: - """Check if all downloads are finished and if so, trigger reading. - Arguments: - download: The finished download. - """ - self._in_progress.remove(download) - if download.successful: - self._done_count += 1 - assert not isinstance(download.fileobj, - downloads.UnsupportedAttribute) - assert download.fileobj is not None - try: - self._merge_file(download.fileobj) - finally: - download.fileobj.close() - if not self._in_progress: - try: - self._on_lists_downloaded() - except OSError: - logger.exception("Failed to write host block list!") - - -@cmdutils.register() -def adblock_update() -> None: - """Update the adblock block lists. - - This updates `~/.local/share/qutebrowser/blocked-hosts` with downloaded - host lists and re-reads `~/.config/qutebrowser/blocked-hosts`. - """ - # FIXME: As soon as we can register instances again, we should move this - # back to the class. - _host_blocker.adblock_update() +@hook.config_changed("content.blocking.hosts.lists") +def on_lists_changed() -> None: + host_blocker.update_files() -@hook.config_changed('content.host_blocking.lists') -def on_config_changed() -> None: - _host_blocker.update_files() +@hook.config_changed("content.blocking.method") +def on_method_changed() -> None: + host_blocker.enabled = _should_be_used() @hook.init() def init(context: apitypes.InitContext) -> None: """Initialize the host blocker.""" - global _host_blocker - _host_blocker = HostBlocker(data_dir=context.data_dir, - config_dir=context.config_dir, - has_basedir=context.args.basedir is not None) - _host_blocker.read_hosts() - interceptor.register(_host_blocker.filter_request) + global host_blocker + host_blocker = HostBlocker( + data_dir=context.data_dir, + config_dir=context.config_dir, + has_basedir=context.args.basedir is not None, + ) + host_blocker.read_hosts() + interceptor.register(host_blocker.filter_request) diff --git a/qutebrowser/components/adblockcommands.py b/qutebrowser/components/adblockcommands.py new file mode 100644 index 000000000..4574650d3 --- /dev/null +++ b/qutebrowser/components/adblockcommands.py @@ -0,0 +1,31 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2020 Florian Bruhin (The Compiler) <mail@qutebrowser.org> +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>. + +"""Commands relating to ad blocking.""" + +from qutebrowser.api import cmdutils +from qutebrowser.components import braveadblock, adblock + + +@cmdutils.register() +def adblock_update() -> None: + """Update block lists for both the host- and the Brave ad blocker.""" + if braveadblock.ad_blocker is not None: + braveadblock.ad_blocker.adblock_update() + adblock.host_blocker.adblock_update() diff --git a/qutebrowser/components/braveadblock.py b/qutebrowser/components/braveadblock.py new file mode 100644 index 000000000..f2435d649 --- /dev/null +++ b/qutebrowser/components/braveadblock.py @@ -0,0 +1,298 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2020 Florian Bruhin (The Compiler) <mail@qutebrowser.org> +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>. + +"""Functions related to the Brave adblocker.""" + +import io +import logging +import pathlib +import functools +from typing import Optional, IO + +from PyQt5.QtCore import QUrl + +from qutebrowser.api import ( + hook, + config, + message, + interceptor, + apitypes, + qtutils, +) +from qutebrowser.api.interceptor import ResourceType +from qutebrowser.components.utils import blockutils +from qutebrowser.utils import version + +try: + import adblock +except ImportError: + adblock = None # type: ignore[assignment] + +# If the `adblock` library version is outdated, this variable is not None and +# contains its version. +_outdated_version: Optional[str] = None +logger = logging.getLogger("network") +ad_blocker: Optional["BraveAdBlocker"] = None + + +def _should_be_used() -> bool: + """Whether the Brave adblocker should be used or not. + + Here we assume the adblock dependency is satisfied. + """ + return config.val.content.blocking.method in ("auto", "both", "adblock") + + +def _possibly_show_missing_dependency_warning() -> None: + """Show missing dependency warning, if appropriate. + + If the adblocking method is configured such that the Brave adblocker + should be used, but the optional dependency is not satisfied, we show an + error message. + """ + method = config.val.content.blocking.method + if method not in ("both", "adblock"): + return + + if _outdated_version is not None: + message.warning( + f"Installed version {_outdated_version} of the 'adblock' dependency is too " + f"old. Minimum supported is {version.MODULE_INFO['adblock'].min_version}." + ) + else: + message.warning( + f"Ad blocking method is set to '{method}' but 'adblock' dependency is not " + "installed." + ) + + +_RESOURCE_TYPE_STRINGS = { + ResourceType.main_frame: "main_frame", + ResourceType.sub_frame: "sub_frame", + ResourceType.stylesheet: "stylesheet", + ResourceType.script: "script", + ResourceType.image: "image", + ResourceType.font_resource: "font", + ResourceType.sub_resource: "sub_frame", + ResourceType.object: "object", + ResourceType.media: "media", + ResourceType.worker: "other", + ResourceType.shared_worker: "other", + ResourceType.prefetch: "other", + ResourceType.favicon: "image", + ResourceType.xhr: "xhr", + ResourceType.ping: "ping", + ResourceType.service_worker: "other", + ResourceType.csp_report: "csp_report", + ResourceType.plugin_resource: "other", + ResourceType.preload_main_frame: "other", + ResourceType.preload_sub_frame: "other", + ResourceType.unknown: "other", + None: "", +} + + +def resource_type_to_string(resource_type: Optional[ResourceType]) -> str: + return _RESOURCE_TYPE_STRINGS.get(resource_type, "other") + + +class BraveAdBlocker: + + """Manage blocked hosts based on Brave's adblocker. + + Attributes: + enabled: Whether to block ads or not. + _has_basedir: Whether a custom --basedir is set. + _cache_path: The path of the adblock engine cache file + _engine: Brave ad-blocking engine. + """ + + def __init__(self, *, data_dir: pathlib.Path, has_basedir: bool = False) -> None: + self.enabled = _should_be_used() + self._has_basedir = has_basedir + self._cache_path = data_dir / "adblock-cache.dat" + self._engine = adblock.Engine(adblock.FilterSet()) + + def _is_blocked( + self, + request_url: QUrl, + first_party_url: Optional[QUrl] = None, + resource_type: Optional[interceptor.ResourceType] = None, + ) -> bool: + """Check whether the given request is blocked.""" + if not self.enabled: + # Do nothing if `content.blocking.method` is not set to enable the + # use of this adblocking module. + return False + + if first_party_url is None or not first_party_url.isValid(): + # FIXME: It seems that when `first_party_url` is None, every URL + # I try is blocked. This may have been a result of me incorrectly + # using the upstream library, or an upstream bug. For now we don't + # block any request with `first_party_url=None`. + return False + + qtutils.ensure_valid(request_url) + + if not config.get("content.blocking.enabled", url=first_party_url): + # Do nothing if adblocking is disabled for this site. + return False + + result = self._engine.check_network_urls( + request_url.toString(), + first_party_url.toString(), + resource_type_to_string(resource_type), + ) + + if not result.matched: + return False + elif result.exception is not None and not result.important: + # Exception is not `None` when the blocker matched on an exception + # rule. Effectively this means that there was a match, but the + # request should not be blocked. + # + # An `important` match means that exceptions should not apply and + # no further checking is necessary--the request should be blocked. + logger.debug( + "Excepting %s from being blocked by %s because of %s", + request_url.toDisplayString(), + result.filter, + result.exception, + ) + return False + elif blockutils.is_whitelisted_url(request_url): + logger.debug( + "Request to %s is whitelisted, thus not blocked", + request_url.toDisplayString(), + ) + return False + return True + + def filter_request(self, info: interceptor.Request) -> None: + """Block the given request if necessary.""" + if self._is_blocked(info.request_url, info.first_party_url, info.resource_type): + logger.debug( + "Request to %s blocked by ad blocker.", + info.request_url.toDisplayString(), + ) + info.block() + + def read_cache(self) -> None: + """Initialize the adblocking engine from cache file.""" + if self._cache_path.is_file(): + logger.debug("Loading cached adblock data: %s", self._cache_path) + self._engine.deserialize_from_file(str(self._cache_path)) + else: + if ( + config.val.content.blocking.adblock.lists + and not self._has_basedir + and config.val.content.blocking.enabled + and self.enabled + ): + message.info("Run :adblock-update to get adblock lists.") + + def adblock_update(self) -> blockutils.BlocklistDownloads: + """Update the adblock block lists.""" + logger.info("Downloading adblock filter lists...") + + filter_set = adblock.FilterSet() + dl = blockutils.BlocklistDownloads(config.val.content.blocking.adblock.lists) + dl.single_download_finished.connect( + functools.partial(self._on_download_finished, filter_set=filter_set) + ) + dl.all_downloads_finished.connect( + functools.partial(self._on_lists_downloaded, filter_set=filter_set) + ) + dl.initiate() + return dl + + def _on_lists_downloaded( + self, done_count: int, filter_set: "adblock.FilterSet" + ) -> None: + """Install block lists after files have been downloaded.""" + self._engine = adblock.Engine(filter_set) + self._engine.serialize_to_file(str(self._cache_path)) + logger.info( + "braveadblock: Filters successfully read from %s sources", done_count + ) + + def update_files(self) -> None: + """Update files when the config changed.""" + if not config.val.content.blocking.adblock.lists: + try: + self._cache_path.unlink() + except FileNotFoundError: + pass + except OSError as e: + logger.exception("Failed to remove adblock cache file: %s", e) + + def _on_download_finished( + self, fileobj: IO[bytes], filter_set: "adblock.FilterSet" + ) -> None: + """When a blocklist download finishes, add it to the given filter set. + + Arguments: + fileobj: The finished download's contents. + """ + fileobj.seek(0) + try: + with io.TextIOWrapper(fileobj, encoding="utf-8") as text_io: + filter_set.add_filter_list(text_io.read()) + except UnicodeDecodeError: + message.info("braveadblock: Block list is not valid utf-8") + + +@hook.config_changed("content.blocking.adblock.lists") +def on_lists_changed() -> None: + """Remove cached blocker from disk when blocklist changes.""" + if ad_blocker is not None: + ad_blocker.update_files() + + +@hook.config_changed("content.blocking.method") +def on_method_changed() -> None: + """When the adblocking method changes, update blocker accordingly.""" + if ad_blocker is not None: + # This implies the 'adblock' dependency is satisfied + ad_blocker.enabled = _should_be_used() + else: + _possibly_show_missing_dependency_warning() + + +@hook.init() +def init(context: apitypes.InitContext) -> None: + """Initialize the Brave ad blocker.""" + global ad_blocker + global _outdated_version + + _adblock_info = version.MODULE_INFO["adblock"] + if adblock is None or _adblock_info.is_outdated(): # type: ignore[unreachable] + # We want 'adblock' to be an optional dependency. If the module is + # not installed or is outdated, we simply set the `ad_blocker` global to + # `None`. + ad_blocker = None + _outdated_version = _adblock_info.get_version() + _possibly_show_missing_dependency_warning() + return + + ad_blocker = BraveAdBlocker( + data_dir=context.data_dir, has_basedir=context.args.basedir is not None + ) + ad_blocker.read_cache() + interceptor.register(ad_blocker.filter_request) diff --git a/qutebrowser/components/utils/__init__.py b/qutebrowser/components/utils/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qutebrowser/components/utils/__init__.py diff --git a/qutebrowser/components/utils/blockutils.py b/qutebrowser/components/utils/blockutils.py new file mode 100644 index 000000000..502038f48 --- /dev/null +++ b/qutebrowser/components/utils/blockutils.py @@ -0,0 +1,162 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2020 Florian Bruhin (The Compiler) <mail@qutebrowser.org> +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>. + + +"""Code that is shared between the host blocker and Brave ad blocker.""" + +import os +import functools +from typing import IO, List, Optional + +from PyQt5.QtCore import QUrl, QObject, pyqtSignal + +from qutebrowser.api import downloads, message, config + + +class FakeDownload(downloads.TempDownload): + + """A download stub to use on_download_finished with local files.""" + + def __init__(self, fileobj: IO[bytes]) -> None: + # pylint: disable=super-init-not-called + self.fileobj = fileobj + self.successful = True + + +class BlocklistDownloads(QObject): + """Download blocklists from the given URLs. + + Attributes: + single_download_finished: + A signal that is emitted when a single download has finished. The + listening slot is provided with the download object. + all_downloads_finished: + A signal that is emitted when all downloads have finished. The + first argument is the number of items downloaded. + _urls: The URLs to download. + _in_progress: The DownloadItems which are currently downloading. + _done_count: How many files have been read successfully. + _finished_registering_downloads: + Used to make sure that if all the downloads finish really quickly, + before all of the block-lists have been added to the download + queue, we don't emit `single_download_finished`. + _started: Has the `initiate` method been called? + _finished: Has `all_downloads_finished` been emitted? + """ + + single_download_finished = pyqtSignal(object) # arg: the file object + all_downloads_finished = pyqtSignal(int) # arg: download count + + def __init__(self, urls: List[QUrl], parent: Optional[QObject] = None) -> None: + super().__init__(parent) + self._urls = urls + + self._in_progress: List[downloads.TempDownload] = [] + self._done_count = 0 + self._finished_registering_downloads = False + self._started = False + self._finished = False + + def initiate(self) -> None: + """Initiate downloads of each url in `self._urls`.""" + if self._started: + raise ValueError("This download has already been initiated") + self._started = True + + if not self._urls: + self._finished = True + self.all_downloads_finished.emit(self._done_count) + return + + for url in self._urls: + self._download_blocklist_url(url) + self._finished_registering_downloads = True + + if not self._in_progress and not self._finished: + # The in-progress list is empty but we still haven't called the + # completion callback yet. This happens when all downloads finish + # before we've set `_finished_registering_dowloads` to False. + self._finished = True + self.all_downloads_finished.emit(self._done_count) + + def _download_blocklist_url(self, url: QUrl) -> None: + """Take a blocklist url and queue it for download. + + Args: + url: url to download + """ + if url.scheme() == "file": + # The URL describes a local file on disk if the url scheme is + # "file://". We handle those as a special case. + filename = url.toLocalFile() + if os.path.isdir(filename): + for entry in os.scandir(filename): + if entry.is_file(): + self._import_local(entry.path) + else: + self._import_local(filename) + else: + download = downloads.download_temp(url) + self._in_progress.append(download) + download.finished.connect( + functools.partial(self._on_download_finished, download) + ) + + def _import_local(self, filename: str) -> None: + """Pretend that a local file was downloaded from the internet. + + Args: + filename: path to a local file to import. + """ + try: + fileobj = open(filename, "rb") + except OSError as e: + message.error( + "blockutils: Error while reading {}: {}".format(filename, e.strerror) + ) + return + download = FakeDownload(fileobj) + self._in_progress.append(download) + self._on_download_finished(download) + + def _on_download_finished(self, download: downloads.TempDownload) -> None: + """Check if all downloads are finished and if so, trigger callback. + + Arguments: + download: The finished download. + """ + self._in_progress.remove(download) + if download.successful: + self._done_count += 1 + assert not isinstance(download.fileobj, downloads.UnsupportedAttribute) + assert download.fileobj is not None + try: + # Call the user-provided callback + self.single_download_finished.emit(download.fileobj) + finally: + download.fileobj.close() + if not self._in_progress and self._finished_registering_downloads: + self._finished = True + self.all_downloads_finished.emit(self._done_count) + + +def is_whitelisted_url(url: QUrl) -> bool: + """Check if the given URL is on the adblock whitelist.""" + whitelist = config.val.content.blocking.whitelist + return any(pattern.matches(url) for pattern in whitelist) diff --git a/qutebrowser/config/configdata.yml b/qutebrowser/config/configdata.yml index 82ac04355..ab7cb22f0 100644 --- a/qutebrowser/config/configdata.yml +++ b/qutebrowser/config/configdata.yml @@ -592,12 +592,21 @@ content.headers.user_agent: to JavaScript requires a restart. content.host_blocking.enabled: + renamed: content.blocking.enabled + +content.host_blocking.lists: + renamed: content.blocking.hosts.lists + +content.host_blocking.whitelist: + renamed: content.blocking.whitelist + +content.blocking.enabled: default: true supports_pattern: true type: Bool - desc: Enable host blocking. + desc: Enable the ad blocker -content.host_blocking.lists: +content.blocking.hosts.lists: default: - "https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts" type: @@ -619,7 +628,35 @@ content.host_blocking.lists: The file `~/.config/qutebrowser/blocked-hosts` is always read if it exists. -content.host_blocking.whitelist: +content.blocking.method: + default: auto + type: + name: String + valid_values: + - auto: "Use Brave's ABP-style adblocker if available, host blocking + otherwise" + - adblock: Use Brave's ABP-style adblocker + - hosts: Use hosts blocking + - both: Use both hosts blocking and Brave's ABP-style adblocker + desc: | + Which method of blocking ads should be used. + + Support for Adblock Plus syntax blocklists requires the `adblock` Python + package to be installed, which is an optional dependency of qutebrowser. + It is required when either `adblock` or `both` are selected. + +content.blocking.adblock.lists: + default: + - "https://easylist.to/easylist/easylist.txt" + - "https://easylist.to/easylist/easyprivacy.txt" + type: + name: List + valtype: Url + none_ok: true + desc: | + List of URLs to ABP-style adblocking rulesets. + +content.blocking.whitelist: default: [] type: name: List @@ -627,14 +664,13 @@ content.host_blocking.whitelist: none_ok: true desc: >- A list of patterns that should always be loaded, despite being ad-blocked. + Local domains are always exempt from adblocking. - Note this whitelists blocked hosts, not first-party URLs. As an example, if - `example.org` loads an ad from `ads.example.org`, the whitelisted host - should be `ads.example.org`. If you want to disable the adblocker on a - given page, use the `content.host_blocking.enabled` setting with a URL - pattern instead. - - Local domains are always exempt from hostblocking. + Note this whitelists otherwise blocked requests, not first-party URLs. As + an example, if `example.org` loads an ad from `ads.example.org`, the + whitelist entry could be `https://ads.example.org/*`. If you want to + disable the adblocker on a given page, use the + `content.host_blocking.enabled` setting with a URL pattern instead. content.hyperlink_auditing: default: false diff --git a/qutebrowser/utils/version.py b/qutebrowser/utils/version.py index 09aeb5a13..a31b4eae2 100644 --- a/qutebrowser/utils/version.py +++ b/qutebrowser/utils/version.py @@ -248,43 +248,135 @@ def _release_info() -> Sequence[Tuple[str, str]]: return data -def _module_versions() -> Sequence[str]: - """Get versions of optional modules. - - Return: - A list of lines with version info. +class ModuleInfo: + + """Class to query version information of qutebrowser dependencies. + + Attributes: + name: Name of the module as it is imported. + _version_attributes: + Sequence of attribute names belonging to the module which may hold + version information. + min_version: Minimum version of this module which qutebrowser can use. + _installed: Is the module installed? Determined at runtime. + _version: Version of the module. Determined at runtime. + _initialized: + Set to `True` if the `self._installed` and `self._version` + attributes have been set. """ - lines = [] - modules: Mapping[str, Sequence[str]] = collections.OrderedDict([ + + def __init__( + self, + name: str, + version_attributes: Sequence[str], + min_version: Optional[str] = None + ): + self.name = name + self._version_attributes = version_attributes + self.min_version = min_version + self._installed = False + self._version: Optional[str] = None + self._initialized = False + + def _reset_cache(self) -> None: + """Reset the version cache. + + It is necessary to call this method in unit tests that mock a module's + version number. + """ + self._installed = False + self._version = None + self._initialized = False + + def _initialize_info(self) -> None: + """Import module and set `self.installed` and `self.version`.""" + try: + module = importlib.import_module(self.name) + except (ImportError, ValueError): + self._installed = False + return + else: + self._installed = True + + for attribute_name in self._version_attributes: + if hasattr(module, attribute_name): + version = getattr(module, attribute_name) + assert isinstance(version, (str, float)) + self._version = str(version) + break + + self._initialized = True + + def get_version(self) -> Optional[str]: + """Finds the module version if it exists.""" + if not self._initialized: + self._initialize_info() + return self._version + + def is_installed(self) -> bool: + """Checks whether the module is installed.""" + if not self._initialized: + self._initialize_info() + return self._installed + + def is_outdated(self) -> Optional[bool]: + """Checks whether the module is outdated. + + Return: + A boolean when the version and minimum version are both defined. + Otherwise `None`. + """ + version = self.get_version() + if ( + not self.is_installed() + or version is None + or self.min_version is None + ): + return None + return version < self.min_version + + def __str__(self) -> str: + if not self.is_installed(): + return f'{self.name}: no' + + version = self.get_version() + if version is None: + return f'{self.name}: yes' + + text = f'{self.name}: {version}' + if self.is_outdated(): + text += f" (< {self.min_version}, outdated)" + return text + + +MODULE_INFO: Mapping[str, ModuleInfo] = collections.OrderedDict([ + # FIXME: Mypy doesn't understand this. See https://github.com/python/mypy/issues/9706 + (name, ModuleInfo(name, *args)) # type: ignore[arg-type, misc] + for (name, *args) in + [ ('sip', ['SIP_VERSION_STR']), ('colorama', ['VERSION', '__version__']), ('pypeg2', ['__version__']), ('jinja2', ['__version__']), ('pygments', ['__version__']), ('yaml', ['__version__']), + ('adblock', ['__version__'], "0.3.2"), ('attr', ['__version__']), ('importlib_resources', []), ('PyQt5.QtWebEngineWidgets', []), ('PyQt5.QtWebEngine', ['PYQT_WEBENGINE_VERSION_STR']), ('PyQt5.QtWebKitWidgets', []), - ]) - for modname, attributes in modules.items(): - try: - module = importlib.import_module(modname) - except (ImportError, ValueError): - text = '{}: no'.format(modname) - else: - for name in attributes: - try: - text = '{}: {}'.format(modname, getattr(module, name)) - except AttributeError: - pass - else: - break - else: - text = '{}: yes'.format(modname) - lines.append(text) - return lines + ] +]) + + +def _module_versions() -> Sequence[str]: + """Get versions of optional modules. + + Return: + A list of lines with version info. + """ + return [str(mod_info) for mod_info in MODULE_INFO.values()] def _path_info() -> Mapping[str, str]: diff --git a/requirements.txt b/requirements.txt index 34412dadb..0d682f809 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ # This file is automatically generated by scripts/dev/recompile_requirements.py +adblock==0.3.2 attrs==20.3.0 colorama==0.4.4 importlib-resources==3.3.0 ; python_version<"3.9" diff --git a/scripts/dev/recompile_requirements.py b/scripts/dev/recompile_requirements.py index 3b7c227f2..5cda4b3d7 100644 --- a/scripts/dev/recompile_requirements.py +++ b/scripts/dev/recompile_requirements.py @@ -173,6 +173,7 @@ CHANGELOG_URLS = { 'jinja2-pluralize': 'https://github.com/audreyfeldroy/jinja2_pluralize/blob/master/HISTORY.rst', 'mypy-extensions': 'https://github.com/python/mypy_extensions/commits/master', 'pyroma': 'https://github.com/regebro/pyroma/blob/master/HISTORY.txt', + 'adblock': 'https://github.com/ArniDagur/python-adblock/blob/master/CHANGELOG.md', 'pyPEG2': None, 'importlib-resources': 'https://importlib-resources.readthedocs.io/en/latest/changelog%20%28links%29.html', } diff --git a/scripts/dev/run_vulture.py b/scripts/dev/run_vulture.py index 5e42febd4..194381421 100755 --- a/scripts/dev/run_vulture.py +++ b/scripts/dev/run_vulture.py @@ -135,7 +135,10 @@ def whitelist_generator(): # noqa: C901 yield 'scripts.importer.import_moz_places.places.row_factory' # component hooks - yield 'qutebrowser.components.adblock.on_config_changed' + yield 'qutebrowser.components.adblock.on_lists_changed' + yield 'qutebrowser.components.braveadblock.on_lists_changed' + yield 'qutebrowser.components.adblock.on_method_changed' + yield 'qutebrowser.components.braveadblock.on_method_changed' # used in type comments yield 'pending_download_type' diff --git a/scripts/hostblock_blame.py b/scripts/hostblock_blame.py index 2949b9e78..c3da64729 100644 --- a/scripts/hostblock_blame.py +++ b/scripts/hostblock_blame.py @@ -39,7 +39,7 @@ def main(): configdata.init() - for url in configdata.DATA['content.host_blocking.lists'].default: + for url in configdata.DATA['content.blocking.hosts.lists'].default: print("checking {}...".format(url)) raw_file = urllib.request.urlopen(url) byte_io = io.BytesIO(raw_file.read()) diff --git a/tests/end2end/data/brave-adblock/LICENSE b/tests/end2end/data/brave-adblock/LICENSE new file mode 100644 index 000000000..6f03f190d --- /dev/null +++ b/tests/end2end/data/brave-adblock/LICENSE @@ -0,0 +1,318 @@ +Mozilla Public License, version 2.0 + +Copyright (c) 2019, Andrius Aucinas + +1. Definitions + + 1.1. “Contributor” means each individual or legal entity that + creates, contributes to the creation of, or owns Covered Software. + + 1.2. “Contributor Version” means the combination of the + Contributions of others (if any) used by a Contributor and that + particular Contributor’s Contribution. + + 1.3. “Contribution” means Covered Software of a particular + Contributor. + + 1.4. “Covered Software” means Source Code Form to which the initial + Contributor has attached the notice in Exhibit A, the Executable + Form of such Source Code Form, and Modifications of such Source Code + Form, in each case including portions thereof. + + 1.5. “Incompatible With Secondary Licenses” means + + that the initial Contributor has attached the notice described in + Exhibit B to the Covered Software; or + + that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the terms + of a Secondary License. + + 1.6. “Executable Form” means any form of the work other than Source + Code Form. + + 1.7. “Larger Work” means a work that combines Covered Software with + other material, in a separate file or files, that is not Covered + Software. + + 1.8. “License” means this document. + + 1.9. “Licensable” means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or + subsequently, any and all of the rights conveyed by this License. + + 1.10. “Modifications” means any of the following: + + any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered Software; + or + + any new file in Source Code Form that contains any Covered Software. + + 1.11. “Patent Claims” of a Contributor means any patent claim(s), + including without limitation, method, process, and apparatus claims, + in any patent Licensable by such Contributor that would be + infringed, but for the grant of the License, by the making, using, + selling, offering for sale, having made, import, or transfer of + either its Contributions or its Contributor Version. + + 1.12. “Secondary License” means either the GNU General Public + License, Version 2.0, the GNU Lesser General Public License, Version + 2.1, the GNU Affero General Public License, Version 3.0, or any + later versions of those licenses. + + 1.13. “Source Code Form” means the form of the work preferred for + making modifications. + + 1.14. “You” (or “Your”) means an individual or a legal entity + exercising rights under this License. For legal entities, “You” + includes any entity that controls, is controlled by, or is under + common control with You. For purposes of this definition, “control” + means (a) the power, direct or indirect, to cause the direction or + management of such entity, whether by contract or otherwise, or (b) + ownership of more than fifty percent (50%) of the outstanding shares + or beneficial ownership of such entity. + + +2. License Grants and Conditions + + 2.1. Grants Each Contributor hereby grants You a world-wide, + royalty-free, non-exclusive license: + + under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + + under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + + 2.2. Effective Date The licenses granted in Section 2.1 with respect + to any Contribution become effective for each Contribution on the + date the Contributor first distributes such Contribution. + + 2.3. Limitations on Grant Scope The licenses granted in this Section + 2 are the only rights granted under this License. No additional + rights or licenses will be implied from the distribution or + licensing of Covered Software under this License. Notwithstanding + Section 2.1(b) above, no patent license is granted by a Contributor: + + for any code that a Contributor has removed from Covered Software; + or + + for infringements caused by: (i) Your and any other third party’s + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + + under Patent Claims infringed by Covered Software in the absence of + its Contributions. + + This License does not grant any rights in the trademarks, service + marks, or logos of any Contributor (except as may be necessary to + comply with the notice requirements in Section 3.4). + + 2.4. Subsequent Licenses No Contributor makes additional grants as a + result of Your choice to distribute the Covered Software under a + subsequent version of this License (see Section 10.2) or under the + terms of a Secondary License (if permitted under the terms of + Section 3.3). + + 2.5. Representation Each Contributor represents that the Contributor + believes its Contributions are its original creation(s) or it has + sufficient rights to grant the rights to its Contributions conveyed + by this License. + + 2.6. Fair Use This License is not intended to limit any rights You + have under applicable copyright doctrines of fair use, fair dealing, + or other equivalents. + + 2.7. Conditions Sections 3.1, 3.2, 3.3, and 3.4 are conditions of + the licenses granted in Section 2.1. + + +3. Responsibilities + + 3.1. Distribution of Source Form All distribution of Covered + Software in Source Code Form, including any Modifications that You + create or to which You contribute, must be under the terms of this + License. You must inform recipients that the Source Code Form of the + Covered Software is governed by the terms of this License, and how + they can obtain a copy of this License. You may not attempt to alter + or restrict the recipients’ rights in the Source Code Form. + + 3.2. Distribution of Executable Form If You distribute Covered + Software in Executable Form then: + + such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + + You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients’ rights in the Source Code Form under this License. + + 3.3. Distribution of a Larger Work You may create and distribute a + Larger Work under terms of Your choice, provided that You also + comply with the requirements of this License for the Covered + Software. If the Larger Work is a combination of Covered Software + with a work governed by one or more Secondary Licenses, and the + Covered Software is not Incompatible With Secondary Licenses, this + License permits You to additionally distribute such Covered Software + under the terms of such Secondary License(s), so that the recipient + of the Larger Work may, at their option, further distribute the + Covered Software under the terms of either this License or such + Secondary License(s). + + 3.4. Notices You may not remove or alter the substance of any + license notices (including copyright notices, patent notices, + disclaimers of warranty, or limitations of liability) contained + within the Source Code Form of the Covered Software, except that You + may alter any license notices to the extent required to remedy known + factual inaccuracies. + + 3.5. Application of Additional Terms You may choose to offer, and to + charge a fee for, warranty, support, indemnity or liability + obligations to one or more recipients of Covered Software. However, + You may do so only on Your own behalf, and not on behalf of any + Contributor. You must make it absolutely clear that any such + warranty, support, indemnity, or liability obligation is offered by + You alone, and You hereby agree to indemnify every Contributor for + any liability incurred by such Contributor as a result of warranty, + support, indemnity or liability terms You offer. You may include + additional disclaimers of warranty and limitations of liability + specific to any jurisdiction. + + +4. Inability to Comply Due to Statute or Regulation + + If it is impossible for You to comply with any of the terms of this + License with respect to some or all of the Covered Software due to + statute, judicial order, or regulation then You must: (a) comply + with the terms of this License to the maximum extent possible; and + (b) describe the limitations and the code they affect. Such + description must be placed in a text file included with all + distributions of the Covered Software under this License. Except to + the extent prohibited by statute or regulation, such description + must be sufficiently detailed for a recipient of ordinary skill to + be able to understand it. + + +5. Termination + + 5.1. The rights granted under this License will terminate + automatically if You fail to comply with any of its terms. However, + if You become compliant, then the rights granted under this License + from a particular Contributor are reinstated (a) provisionally, + unless and until such Contributor explicitly and finally terminates + Your grants, and (b) on an ongoing basis, if such Contributor fails + to notify You of the non-compliance by some reasonable means prior + to 60 days after You have come back into compliance. Moreover, Your + grants from a particular Contributor are reinstated on an ongoing + basis if such Contributor notifies You of the non-compliance by some + reasonable means, this is the first time You have received notice of + non-compliance with this License from such Contributor, and You + become compliant prior to 30 days after Your receipt of the notice. + + 5.2. If You initiate litigation against any entity by asserting a + patent infringement claim (excluding declaratory judgment actions, + counter-claims, and cross-claims) alleging that a Contributor + Version directly or indirectly infringes any patent, then the rights + granted to You by any and all Contributors for the Covered Software + under Section 2.1 of this License shall terminate. + + 5.3. In the event of termination under Sections 5.1 or 5.2 above, + all end user license agreements (excluding distributors and + resellers) which have been validly granted by You or Your + distributors under this License prior to termination shall survive + termination. + +6. Disclaimer of Warranty Covered Software is provided under this +License on an “as is” basis, without warranty of any kind, either +expressed, implied, or statutory, including, without limitation, +warranties that the Covered Software is free of defects, merchantable, +fit for a particular purpose or non-infringing. The entire risk as to +the quality and performance of the Covered Software is with You. +Should any Covered Software prove defective in any respect, You (not +any Contributor) assume the cost of any necessary servicing, repair, +or correction. This disclaimer of warranty constitutes an essential +part of this License. No use of any Covered Software is authorized +under this License except under this disclaimer. + +7. Limitation of Liability Under no circumstances and under no legal +theory, whether tort (including negligence), contract, or otherwise, +shall any Contributor, or anyone who distributes Covered Software as +permitted above, be liable to You for any direct, indirect, special, +incidental, or consequential damages of any character including, +without limitation, damages for lost profits, loss of goodwill, work +stoppage, computer failure or malfunction, or any and all other +commercial damages or losses, even if such party shall have been +informed of the possibility of such damages. This limitation of +liability shall not apply to liability for death or personal injury +resulting from such party’s negligence to the extent applicable law +prohibits such limitation. Some jurisdictions do not allow the +exclusion or limitation of incidental or consequential damages, so +this exclusion and limitation may not apply to You. + +8. Litigation Any litigation relating to this License may be brought +only in the courts of a jurisdiction where the defendant maintains its +principal place of business and such litigation shall be governed by +laws of that jurisdiction, without reference to its conflict-of-law +provisions. Nothing in this Section shall prevent a party’s ability to +bring cross-claims or counter-claims. + +9. Miscellaneous This License represents the complete agreement +concerning the subject matter hereof. If any provision of this License +is held to be unenforceable, such provision shall be reformed only to +the extent necessary to make it enforceable. Any law or regulation +which provides that the language of a contract shall be construed +against the drafter shall not be used to construe this License against +a Contributor. + +10. Versions of the License + + 10.1. New Versions Mozilla Foundation is the license steward. Except + as provided in Section 10.3, no one other than the license steward + has the right to modify or publish new versions of this License. + Each version will be given a distinguishing version number. + + 10.2. Effect of New Versions You may distribute the Covered Software + under the terms of the version of the License under which You + originally received the Covered Software, or under the terms of any + subsequent version published by the license steward. + + 10.3. Modified Versions If you create software not governed by this + License, and you want to create a new license for such software, you + may create and use a modified version of this License if you rename + the license and remove any references to the name of the license + steward (except to note that such modified license differs from this + License). + + 10.4. Distributing Source Code Form that is Incompatible With + Secondary Licenses If You choose to distribute Source Code Form that + is Incompatible With Secondary Licenses under the terms of this + version of the License, the notice described in Exhibit B of this + License must be attached. + +Exhibit A - Source Code Form License Notice + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + + If it is not possible or desirable to put the notice in a particular + file, then You may include the notice in a location (such as a + LICENSE file in a relevant directory) where a recipient would be + likely to look for such a notice. + + You may add additional accurate notices of copyright ownership. + +Exhibit B - “Incompatible With Secondary Licenses” Notice + + This Source Code Form is “Incompatible With Secondary Licenses”, as + defined by the Mozilla Public License, v. 2.0. diff --git a/tests/end2end/data/brave-adblock/README.md b/tests/end2end/data/brave-adblock/README.md new file mode 100644 index 000000000..0550e91a4 --- /dev/null +++ b/tests/end2end/data/brave-adblock/README.md @@ -0,0 +1,12 @@ +The `ublock-matches.tsv` file is [downloaded from][1] `adblock-rust`'s Github and preprocessed and compressed using `generate.py` to produce +`ublock-matches.tsv.gz`. + +## License + +The aforementioned file was released under terms of the Mozilla Public +License, version 2.0 (MPLv2) by Andrius Aucinas. A copy of the license may be +found in the [`LICENSE`][2] file of this directory, or on [Mozilla's website][3]. + +[1]: https://github.com/brave/adblock-rust/blob/master/data/ublock-matches.tsv +[2]: LICENSE +[3]: https://www.mozilla.org/en-US/MPL/2.0/ diff --git a/tests/end2end/data/brave-adblock/generate.py b/tests/end2end/data/brave-adblock/generate.py new file mode 100644 index 000000000..ae47c586b --- /dev/null +++ b/tests/end2end/data/brave-adblock/generate.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2020 Árni Dagur <arni@dagur.eu> +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>. + +import io +import gzip +import csv +import pathlib +import itertools +import urllib.request +import tempfile +from typing import Optional + +URL = "https://raw.githubusercontent.com/brave/adblock-rust/master/data/ublock-matches.tsv" +CACHE_PATH = pathlib.Path(tempfile.gettempdir(), "ublock-matches-cache.tsv") +ROWS_TO_USE = 30_000 + + +def type_rename(type_str: str) -> Optional[str]: + """Use the same resource type names as QtWebEngine.""" + if type_str == "other": + return "unknown" + if type_str == "xmlhttprequest": + return "xhr" + if type_str == "font": + return "font_resource" + if type_str in ["image", "stylesheet", "media", "script", "sub_frame"]: + return type_str + return None + + +def main(): + # Download file or use cached version + if CACHE_PATH.is_file(): + print(f"Using cached file {CACHE_PATH}") + data = io.StringIO(CACHE_PATH.read_text(encoding="utf-8")) + else: + request = urllib.request.Request(URL) + print(f"Downloading {URL} ...") + response = urllib.request.urlopen(request) + assert response.status == 200 + data_str = response.read().decode("utf-8") + print(f"Saving to cache file {CACHE_PATH} ...") + CACHE_PATH.write_text(data_str, encoding="utf-8") + data = io.StringIO(data_str) + + # We only want the first three columns and the first ROWS_TO_USE rows + print("Reading rows into memory...") + reader = csv.DictReader(data, delimiter="\t") + rows = list(itertools.islice(reader, ROWS_TO_USE)) + + print("Writing filtered file to memory...") + uncompressed_f = io.StringIO() + writer = csv.DictWriter( + uncompressed_f, ["url", "source_url", "type"], delimiter="\t" + ) + writer.writeheader() + for row in rows: + type_renamed = type_rename(row["type"]) + if type_renamed is None: + # Ignore request types we don't recognize + continue + writer.writerow( + { + "url": row["url"], + "source_url": row["sourceUrl"], + "type": type_renamed, + } + ) + uncompressed_f.seek(0) + + print("Compressing filtered file and saving to disk...") + # Compress the data before storing on the filesystem + with gzip.open("ublock-matches.tsv.gz", "wb", compresslevel=9) as gzip_f: + gzip_f.write(uncompressed_f.read().encode("utf-8")) + + +if __name__ == "__main__": + main() diff --git a/tests/end2end/data/brave-adblock/ublock-matches.tsv.gz b/tests/end2end/data/brave-adblock/ublock-matches.tsv.gz Binary files differnew file mode 100644 index 000000000..bced0da75 --- /dev/null +++ b/tests/end2end/data/brave-adblock/ublock-matches.tsv.gz diff --git a/tests/end2end/data/easylist.txt.gz b/tests/end2end/data/easylist.txt.gz Binary files differnew file mode 100644 index 000000000..b854af6f5 --- /dev/null +++ b/tests/end2end/data/easylist.txt.gz diff --git a/tests/end2end/data/easyprivacy.txt.gz b/tests/end2end/data/easyprivacy.txt.gz Binary files differnew file mode 100644 index 000000000..6ee5e2319 --- /dev/null +++ b/tests/end2end/data/easyprivacy.txt.gz diff --git a/tests/end2end/features/misc.feature b/tests/end2end/features/misc.feature index 06dc0b805..c5a74d081 100644 --- a/tests/end2end/features/misc.feature +++ b/tests/end2end/features/misc.feature @@ -528,7 +528,7 @@ Feature: Various utility commands. Scenario: Simple adblock update When I set up "simple" as block lists And I run :adblock-update - Then the message "adblock: Read 1 hosts from 1 sources." should be shown + Then the message "hostblock: Read 1 hosts from 1 sources." should be shown Scenario: Resource with invalid URL When I open data/invalid_resource.html diff --git a/tests/end2end/features/private.feature b/tests/end2end/features/private.feature index 2698555ab..e12259aa2 100644 --- a/tests/end2end/features/private.feature +++ b/tests/end2end/features/private.feature @@ -220,9 +220,10 @@ Feature: Using private browsing Scenario: Adblocking after reiniting private profile When I open about:blank in a private window And I run :close - And I set content.host_blocking.lists to ["http://localhost:(port)/data/adblock/qutebrowser"] + And I set content.blocking.hosts.lists to ["http://localhost:(port)/data/adblock/qutebrowser"] + And I set content.blocking.method to hosts And I run :adblock-update - And I wait for the message "adblock: Read 1 hosts from 1 sources." + And I wait for the message "hostblock: Read 1 hosts from 1 sources." And I open data/adblock/external_logo.html in a private window Then "Request to qutebrowser.org blocked by host blocker." should be logged diff --git a/tests/end2end/features/test_misc_bdd.py b/tests/end2end/features/test_misc_bdd.py index 8dcf7edd1..cab2ddaa7 100644 --- a/tests/end2end/features/test_misc_bdd.py +++ b/tests/end2end/features/test_misc_bdd.py @@ -34,4 +34,4 @@ def pdf_exists(quteproc, tmpdir, filename): def set_up_blocking(quteproc, lists, server): url = 'http://localhost:{}/data/adblock/'.format(server.port) urls = [url + item.strip() for item in lists.split(',')] - quteproc.set_setting('content.host_blocking.lists', json.dumps(urls)) + quteproc.set_setting('content.blocking.hosts.lists', json.dumps(urls)) diff --git a/tests/helpers/utils.py b/tests/helpers/utils.py index 2c275bf15..5f7297719 100644 --- a/tests/helpers/utils.py +++ b/tests/helpers/utils.py @@ -233,9 +233,25 @@ def ignore_bs4_warning(): yield +def _decompress_gzip_datafile(filename): + path = os.path.join(abs_datapath(), filename) + yield from io.TextIOWrapper(gzip.open(path), encoding="utf-8") + + def blocked_hosts(): - path = os.path.join(abs_datapath(), 'blocked-hosts.gz') - yield from io.TextIOWrapper(gzip.open(path), encoding='utf-8') + return _decompress_gzip_datafile("blocked-hosts.gz") + + +def adblock_dataset_tsv(): + return _decompress_gzip_datafile("brave-adblock/ublock-matches.tsv.gz") + + +def easylist_txt(): + return _decompress_gzip_datafile("easylist.txt.gz") + + +def easyprivacy_txt(): + return _decompress_gzip_datafile("easyprivacy.txt.gz") def seccomp_args(qt_flag): diff --git a/tests/unit/components/test_adblock.py b/tests/unit/components/test_adblock.py index 6ee236765..902f6e337 100644 --- a/tests/unit/components/test_adblock.py +++ b/tests/unit/components/test_adblock.py @@ -32,41 +32,44 @@ from qutebrowser.utils import urlmatch from helpers import utils -pytestmark = pytest.mark.usefixtures('qapp') +pytestmark = pytest.mark.usefixtures("qapp") # TODO See ../utils/test_standarddirutils for OSError and caplog assertion -WHITELISTED_HOSTS = ('qutebrowser.org', 'mediumhost.io', 'http://*.edu') +WHITELISTED_HOSTS = ("qutebrowser.org", "mediumhost.io", "http://*.edu") -BLOCKLIST_HOSTS = ('localhost', - 'mediumhost.io', - 'malware.badhost.org', - '4-verybadhost.com', - 'ads.worsthostever.net') +BLOCKLIST_HOSTS = ( + "localhost", + "mediumhost.io", + "malware.badhost.org", + "4-verybadhost.com", + "ads.worsthostever.net", +) -CLEAN_HOSTS = ('goodhost.gov', 'verygoodhost.com') +CLEAN_HOSTS = ("goodhost.gov", "verygoodhost.com") -URLS_TO_CHECK = ('http://localhost', - 'http://mediumhost.io', - 'ftp://malware.badhost.org', - 'http://4-verybadhost.com', - 'http://ads.worsthostever.net', - 'http://goodhost.gov', - 'ftp://verygoodhost.com', - 'http://qutebrowser.org', - 'http://veryverygoodhost.edu') +URLS_TO_CHECK = ( + "http://localhost", + "http://mediumhost.io", + "ftp://malware.badhost.org", + "http://4-verybadhost.com", + "http://ads.worsthostever.net", + "http://goodhost.gov", + "ftp://verygoodhost.com", + "http://qutebrowser.org", + "http://veryverygoodhost.edu", +) @pytest.fixture -def host_blocker_factory(config_tmpdir, data_tmpdir, download_stub, - config_stub): +def host_blocker_factory(config_tmpdir, data_tmpdir, download_stub, config_stub): def factory(): - return adblock.HostBlocker(config_dir=config_tmpdir, - data_dir=data_tmpdir) + return adblock.HostBlocker(config_dir=config_tmpdir, data_dir=data_tmpdir) + return factory -def create_zipfile(directory, files, zipname='test'): +def create_zipfile(directory, files, zipname="test"): """Return a path to a newly created zip file. Args: @@ -74,17 +77,19 @@ def create_zipfile(directory, files, zipname='test'): files: list of filenames (relative to directory) to each file to add. zipname: name to give to the zip file. """ - zipfile_path = directory / zipname + '.zip' - with zipfile.ZipFile(str(zipfile_path), 'w') as new_zipfile: + zipfile_path = directory / zipname + ".zip" + with zipfile.ZipFile(str(zipfile_path), "w") as new_zipfile: for file_path in files: - new_zipfile.write(str(directory / file_path), - arcname=os.path.basename(str(file_path))) + new_zipfile.write( + str(directory / file_path), arcname=os.path.basename(str(file_path)) + ) # Removes path from file name - return str(zipname + '.zip') + return str(zipname + ".zip") -def create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS, - name='hosts', line_format='one_per_line'): +def create_blocklist( + directory, blocked_hosts=BLOCKLIST_HOSTS, name="hosts", line_format="one_per_line" +): """Return a path to a blocklist file. Args: @@ -96,25 +101,29 @@ def create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS, 'not_correct' --> Not a correct hosts file format. """ blocklist_file = directory / name - with blocklist_file.open('w', encoding='UTF-8') as blocklist: + with blocklist_file.open("w", encoding="UTF-8") as blocklist: # ensure comments are ignored when processing blocklist - blocklist.write('# Blocked Hosts List #\n\n') - if line_format == 'etc_hosts': # /etc/hosts like format + blocklist.write("# Blocked Hosts List #\n\n") + if line_format == "etc_hosts": # /etc/hosts like format for host in blocked_hosts: - blocklist.write('127.0.0.1 ' + host + '\n') - elif line_format == 'one_per_line': + blocklist.write("127.0.0.1 " + host + "\n") + elif line_format == "one_per_line": for host in blocked_hosts: - blocklist.write(host + '\n') - elif line_format == 'not_correct': + blocklist.write(host + "\n") + elif line_format == "not_correct": for host in blocked_hosts: - blocklist.write(host + ' This is not a correct hosts file\n') + blocklist.write(host + " This is not a correct hosts file\n") else: - raise ValueError('Incorrect line_format argument') + raise ValueError("Incorrect line_format argument") return name -def assert_urls(host_blocker, blocked=BLOCKLIST_HOSTS, - whitelisted=WHITELISTED_HOSTS, urls_to_check=URLS_TO_CHECK): +def assert_urls( + host_blocker, + blocked=BLOCKLIST_HOSTS, + whitelisted=WHITELISTED_HOSTS, + urls_to_check=URLS_TO_CHECK, +): """Test if Urls to check are blocked or not by HostBlocker. Ensure URLs in 'blocked' and not in 'whitelisted' are blocked. @@ -122,7 +131,7 @@ def assert_urls(host_blocker, blocked=BLOCKLIST_HOSTS, localhost is an example of a special case that shouldn't be blocked. """ - whitelisted = list(whitelisted) + ['localhost'] + whitelisted = list(whitelisted) + ["localhost"] for str_url in urls_to_check: url = QUrl(str_url) host = url.host() @@ -135,8 +144,8 @@ def assert_urls(host_blocker, blocked=BLOCKLIST_HOSTS, def blocklist_to_url(filename): """Get an example.com-URL with the given filename as path.""" assert not os.path.isabs(filename), filename - url = QUrl('http://example.com/') - url.setPath('/' + filename) + url = QUrl("http://example.com/") + url.setPath("/" + filename) assert url.isValid(), url.errorString() return url @@ -153,59 +162,107 @@ def generic_blocklists(directory): - a remote text file without valid hosts format. """ # remote zip file with 1 hosts file and 2 useless files - file1 = create_blocklist(directory, blocked_hosts=CLEAN_HOSTS, - name='README', line_format='not_correct') - file2 = create_blocklist(directory, blocked_hosts=BLOCKLIST_HOSTS[:3], - name='hosts', line_format='etc_hosts') - file3 = create_blocklist(directory, blocked_hosts=CLEAN_HOSTS, - name='false_positive', line_format='one_per_line') + file1 = create_blocklist( + directory, blocked_hosts=CLEAN_HOSTS, name="README", line_format="not_correct" + ) + file2 = create_blocklist( + directory, + blocked_hosts=BLOCKLIST_HOSTS[:3], + name="hosts", + line_format="etc_hosts", + ) + file3 = create_blocklist( + directory, + blocked_hosts=CLEAN_HOSTS, + name="false_positive", + line_format="one_per_line", + ) files_to_zip = [file1, file2, file3] - blocklist1 = blocklist_to_url( - create_zipfile(directory, files_to_zip, 'block1')) + blocklist1 = blocklist_to_url(create_zipfile(directory, files_to_zip, "block1")) # remote zip file without file named hosts # (Should raise a FileNotFoundError) - file1 = create_blocklist(directory, blocked_hosts=CLEAN_HOSTS, - name='md5sum', line_format='etc_hosts') - file2 = create_blocklist(directory, blocked_hosts=CLEAN_HOSTS, - name='README', line_format='not_correct') - file3 = create_blocklist(directory, blocked_hosts=CLEAN_HOSTS, - name='false_positive', line_format='one_per_line') + file1 = create_blocklist( + directory, blocked_hosts=CLEAN_HOSTS, name="md5sum", line_format="etc_hosts" + ) + file2 = create_blocklist( + directory, blocked_hosts=CLEAN_HOSTS, name="README", line_format="not_correct" + ) + file3 = create_blocklist( + directory, + blocked_hosts=CLEAN_HOSTS, + name="false_positive", + line_format="one_per_line", + ) files_to_zip = [file1, file2, file3] - blocklist2 = blocklist_to_url( - create_zipfile(directory, files_to_zip, 'block2')) + blocklist2 = blocklist_to_url(create_zipfile(directory, files_to_zip, "block2")) # remote zip file with only one valid hosts file inside - file1 = create_blocklist(directory, blocked_hosts=[BLOCKLIST_HOSTS[3]], - name='malwarelist', line_format='etc_hosts') - blocklist3 = blocklist_to_url(create_zipfile(directory, [file1], 'block3')) + file1 = create_blocklist( + directory, + blocked_hosts=[BLOCKLIST_HOSTS[3]], + name="malwarelist", + line_format="etc_hosts", + ) + blocklist3 = blocklist_to_url(create_zipfile(directory, [file1], "block3")) # local text file with valid hosts - blocklist4 = QUrl.fromLocalFile(str(directory / create_blocklist( - directory, blocked_hosts=[BLOCKLIST_HOSTS[4]], - name='mycustomblocklist', line_format='one_per_line'))) + blocklist4 = QUrl.fromLocalFile( + str( + directory + / create_blocklist( + directory, + blocked_hosts=[BLOCKLIST_HOSTS[4]], + name="mycustomblocklist", + line_format="one_per_line", + ) + ) + ) assert blocklist4.isValid(), blocklist4.errorString() # remote text file without valid hosts format - blocklist5 = blocklist_to_url(create_blocklist( - directory, blocked_hosts=CLEAN_HOSTS, name='notcorrectlist', - line_format='not_correct')) - - return [blocklist1.toString(), blocklist2.toString(), - blocklist3.toString(), blocklist4.toString(), - blocklist5.toString()] - - -def test_disabled_blocking_update(config_stub, tmpdir, caplog, - host_blocker_factory): - """Ensure no URL is blocked when host blocking is disabled.""" - config_stub.val.content.host_blocking.lists = generic_blocklists(tmpdir) - config_stub.val.content.host_blocking.enabled = False + blocklist5 = blocklist_to_url( + create_blocklist( + directory, + blocked_hosts=CLEAN_HOSTS, + name="notcorrectlist", + line_format="not_correct", + ) + ) + + return [ + blocklist1.toString(), + blocklist2.toString(), + blocklist3.toString(), + blocklist4.toString(), + blocklist5.toString(), + ] + + +@pytest.mark.parametrize( + "blocking_enabled, method", + [ + # Assuming the adblock dependency is installed + (True, "auto"), + (True, "adblock"), + (False, "auto"), + (False, "adblock"), + (False, "both"), + (False, "hosts"), + ], +) +def test_disabled_blocking_update( + config_stub, tmpdir, caplog, host_blocker_factory, blocking_enabled, method +): + """Ensure no URL is blocked when host blocking should be disabled.""" + config_stub.val.content.blocking.hosts.lists = generic_blocklists(tmpdir) + config_stub.val.content.blocking.enabled = blocking_enabled + config_stub.val.content.blocking.method = method host_blocker = host_blocker_factory() - host_blocker.adblock_update() - while host_blocker._in_progress: - current_download = host_blocker._in_progress[0] + downloads = host_blocker.adblock_update() + while downloads._in_progress: + current_download = downloads._in_progress[0] with caplog.at_level(logging.ERROR): current_download.successful = True current_download.finished.emit() @@ -215,14 +272,14 @@ def test_disabled_blocking_update(config_stub, tmpdir, caplog, def test_disabled_blocking_per_url(config_stub, host_blocker_factory): - example_com = 'https://www.example.com/' + example_com = "https://www.example.com/" - config_stub.val.content.host_blocking.lists = [] + config_stub.val.content.blocking.method = "hosts" + config_stub.val.content.blocking.hosts.lists = [] pattern = urlmatch.UrlPattern(example_com) - config_stub.set_obj('content.host_blocking.enabled', False, - pattern=pattern) + config_stub.set_obj("content.blocking.enabled", False, pattern=pattern) - url = QUrl('blocked.example.com') + url = QUrl("blocked.example.com") host_blocker = host_blocker_factory() host_blocker._blocked_hosts.add(url.host()) @@ -233,8 +290,9 @@ def test_disabled_blocking_per_url(config_stub, host_blocker_factory): def test_no_blocklist_update(config_stub, download_stub, host_blocker_factory): """Ensure no URL is blocked when no block list exists.""" - config_stub.val.content.host_blocking.lists = None - config_stub.val.content.host_blocking.enabled = True + config_stub.val.content.blocking.hosts.lists = None + config_stub.val.content.blocking.method = "hosts" + config_stub.val.content.blocking.enabled = True host_blocker = host_blocker_factory() host_blocker.adblock_update() @@ -247,15 +305,16 @@ def test_no_blocklist_update(config_stub, download_stub, host_blocker_factory): def test_successful_update(config_stub, tmpdir, caplog, host_blocker_factory): """Ensure hosts from host_blocking.lists are blocked after an update.""" - config_stub.val.content.host_blocking.lists = generic_blocklists(tmpdir) - config_stub.val.content.host_blocking.enabled = True - config_stub.val.content.host_blocking.whitelist = None + config_stub.val.content.blocking.hosts.lists = generic_blocklists(tmpdir) + config_stub.val.content.blocking.method = "hosts" + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.whitelist = None host_blocker = host_blocker_factory() - host_blocker.adblock_update() + downloads = host_blocker.adblock_update() # Simulate download is finished - while host_blocker._in_progress: - current_download = host_blocker._in_progress[0] + while downloads._in_progress: + current_download = downloads._in_progress[0] with caplog.at_level(logging.ERROR): current_download.successful = True current_download.finished.emit() @@ -263,37 +322,43 @@ def test_successful_update(config_stub, tmpdir, caplog, host_blocker_factory): assert_urls(host_blocker, whitelisted=[]) -def test_parsing_multiple_hosts_on_line(host_blocker_factory): +def test_parsing_multiple_hosts_on_line(config_stub, host_blocker_factory): """Ensure multiple hosts on a line get parsed correctly.""" + config_stub.val.content.blocking.method = "hosts" + config_stub.val.content.blocking.enabled = True + host_blocker = host_blocker_factory() - bytes_host_line = ' '.join(BLOCKLIST_HOSTS).encode('utf-8') + bytes_host_line = " ".join(BLOCKLIST_HOSTS).encode("utf-8") parsed_hosts = host_blocker._read_hosts_line(bytes_host_line) host_blocker._blocked_hosts |= parsed_hosts assert_urls(host_blocker, whitelisted=[]) -@pytest.mark.parametrize('ip, host', [ - ('127.0.0.1', 'localhost'), - ('27.0.0.1', 'localhost.localdomain'), - ('27.0.0.1', 'local'), - ('55.255.255.255', 'broadcasthost'), - (':1', 'localhost'), - (':1', 'ip6-localhost'), - (':1', 'ip6-loopback'), - ('e80::1%lo0', 'localhost'), - ('f00::0', 'ip6-localnet'), - ('f00::0', 'ip6-mcastprefix'), - ('f02::1', 'ip6-allnodes'), - ('f02::2', 'ip6-allrouters'), - ('ff02::3', 'ip6-allhosts'), - ('.0.0.0', '0.0.0.0'), - ('127.0.1.1', 'myhostname'), - ('127.0.0.53', 'myhostname'), -]) +@pytest.mark.parametrize( + "ip, host", + [ + ("127.0.0.1", "localhost"), + ("27.0.0.1", "localhost.localdomain"), + ("27.0.0.1", "local"), + ("55.255.255.255", "broadcasthost"), + (":1", "localhost"), + (":1", "ip6-localhost"), + (":1", "ip6-loopback"), + ("e80::1%lo0", "localhost"), + ("f00::0", "ip6-localnet"), + ("f00::0", "ip6-mcastprefix"), + ("f02::1", "ip6-allnodes"), + ("f02::2", "ip6-allrouters"), + ("ff02::3", "ip6-allhosts"), + (".0.0.0", "0.0.0.0"), + ("127.0.1.1", "myhostname"), + ("127.0.0.53", "myhostname"), + ], +) def test_whitelisted_lines(host_blocker_factory, ip, host): """Make sure we don't block hosts we don't want to.""" host_blocker = host_blocker_factory() - line = ('{} {}'.format(ip, host)).encode('ascii') + line = ("{} {}".format(ip, host)).encode("ascii") parsed_hosts = host_blocker._read_hosts_line(line) assert host not in parsed_hosts @@ -303,19 +368,24 @@ def test_failed_dl_update(config_stub, tmpdir, caplog, host_blocker_factory): Ensure hosts from this list are not blocked. """ - dl_fail_blocklist = blocklist_to_url(create_blocklist( - tmpdir, blocked_hosts=CLEAN_HOSTS, name='download_will_fail', - line_format='one_per_line')) - hosts_to_block = (generic_blocklists(tmpdir) + - [dl_fail_blocklist.toString()]) - config_stub.val.content.host_blocking.lists = hosts_to_block - config_stub.val.content.host_blocking.enabled = True - config_stub.val.content.host_blocking.whitelist = None + dl_fail_blocklist = blocklist_to_url( + create_blocklist( + tmpdir, + blocked_hosts=CLEAN_HOSTS, + name="download_will_fail", + line_format="one_per_line", + ) + ) + hosts_to_block = generic_blocklists(tmpdir) + [dl_fail_blocklist.toString()] + config_stub.val.content.blocking.hosts.lists = hosts_to_block + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.method = "hosts" + config_stub.val.content.blocking.whitelist = None host_blocker = host_blocker_factory() - host_blocker.adblock_update() - while host_blocker._in_progress: - current_download = host_blocker._in_progress[0] + downloads = host_blocker.adblock_update() + while downloads._in_progress: + current_download = downloads._in_progress[0] # if current download is the file we want to fail, make it fail if current_download.name == dl_fail_blocklist.path(): current_download.successful = False @@ -327,37 +397,36 @@ def test_failed_dl_update(config_stub, tmpdir, caplog, host_blocker_factory): assert_urls(host_blocker, whitelisted=[]) -@pytest.mark.parametrize('location', ['content', 'comment']) -def test_invalid_utf8(config_stub, tmpdir, caplog, host_blocker_factory, - location): +@pytest.mark.parametrize("location", ["content", "comment"]) +def test_invalid_utf8(config_stub, tmpdir, caplog, host_blocker_factory, location): """Make sure invalid UTF-8 is handled correctly. See https://github.com/qutebrowser/qutebrowser/issues/2301 """ - blocklist = tmpdir / 'blocklist' - if location == 'comment': - blocklist.write_binary(b'# nbsp: \xa0\n') + blocklist = tmpdir / "blocklist" + if location == "comment": + blocklist.write_binary(b"# nbsp: \xa0\n") else: - assert location == 'content' - blocklist.write_binary(b'https://www.example.org/\xa0') + assert location == "content" + blocklist.write_binary(b"https://www.example.org/\xa0") for url in BLOCKLIST_HOSTS: - blocklist.write(url + '\n', mode='a') + blocklist.write(url + "\n", mode="a") - url = blocklist_to_url('blocklist') - config_stub.val.content.host_blocking.lists = [url.toString()] - config_stub.val.content.host_blocking.enabled = True - config_stub.val.content.host_blocking.whitelist = None + url = blocklist_to_url("blocklist") + config_stub.val.content.blocking.hosts.lists = [url.toString()] + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.method = "hosts" + config_stub.val.content.blocking.whitelist = None host_blocker = host_blocker_factory() - host_blocker.adblock_update() - current_download = host_blocker._in_progress[0] + downloads = host_blocker.adblock_update() + current_download = downloads._in_progress[0] - if location == 'content': + if location == "content": with caplog.at_level(logging.ERROR): current_download.successful = True current_download.finished.emit() - expected = (r"Failed to decode: " - r"b'https://www.example.org/\xa0localhost") + expected = r"Failed to decode: " r"b'https://www.example.org/\xa0localhost" assert caplog.messages[-2].startswith(expected) else: current_download.successful = True @@ -367,18 +436,17 @@ def test_invalid_utf8(config_stub, tmpdir, caplog, host_blocker_factory, assert_urls(host_blocker, whitelisted=[]) -def test_invalid_utf8_compiled(config_stub, config_tmpdir, data_tmpdir, - monkeypatch, caplog, host_blocker_factory): +def test_invalid_utf8_compiled( + config_stub, config_tmpdir, data_tmpdir, monkeypatch, caplog, host_blocker_factory +): """Make sure invalid UTF-8 in the compiled file is handled.""" - config_stub.val.content.host_blocking.lists = [] + config_stub.val.content.blocking.hosts.lists = [] # Make sure the HostBlocker doesn't delete blocked-hosts in __init__ - monkeypatch.setattr(adblock.HostBlocker, 'update_files', - lambda _self: None) + monkeypatch.setattr(adblock.HostBlocker, "update_files", lambda _self: None) - (config_tmpdir / 'blocked-hosts').write_binary( - b'https://www.example.org/\xa0') - (data_tmpdir / 'blocked-hosts').ensure() + (config_tmpdir / "blocked-hosts").write_binary(b"https://www.example.org/\xa0") + (data_tmpdir / "blocked-hosts").ensure() host_blocker = host_blocker_factory() with caplog.at_level(logging.ERROR): @@ -387,18 +455,21 @@ def test_invalid_utf8_compiled(config_stub, config_tmpdir, data_tmpdir, def test_blocking_with_whitelist(config_stub, data_tmpdir, host_blocker_factory): - """Ensure hosts in content.host_blocking.whitelist are never blocked.""" + """Ensure hosts in content.blocking.whitelist are never blocked.""" # Simulate adblock_update has already been run # by creating a file named blocked-hosts, # Exclude localhost from it as localhost is never blocked via list filtered_blocked_hosts = BLOCKLIST_HOSTS[1:] - blocklist = create_blocklist(data_tmpdir, - blocked_hosts=filtered_blocked_hosts, - name='blocked-hosts', - line_format='one_per_line') - config_stub.val.content.host_blocking.lists = [blocklist] - config_stub.val.content.host_blocking.enabled = True - config_stub.val.content.host_blocking.whitelist = list(WHITELISTED_HOSTS) + blocklist = create_blocklist( + data_tmpdir, + blocked_hosts=filtered_blocked_hosts, + name="blocked-hosts", + line_format="one_per_line", + ) + config_stub.val.content.blocking.hosts.lists = [blocklist] + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.method = "hosts" + config_stub.val.content.blocking.whitelist = list(WHITELISTED_HOSTS) host_blocker = host_blocker_factory() host_blocker.read_hosts() @@ -413,11 +484,16 @@ def test_config_change_initial(config_stub, tmpdir, host_blocker_factory): - User quits qutebrowser, empties host_blocking.lists from his config - User restarts qutebrowser, does adblock-update """ - create_blocklist(tmpdir, blocked_hosts=BLOCKLIST_HOSTS, - name='blocked-hosts', line_format='one_per_line') - config_stub.val.content.host_blocking.lists = None - config_stub.val.content.host_blocking.enabled = True - config_stub.val.content.host_blocking.whitelist = None + create_blocklist( + tmpdir, + blocked_hosts=BLOCKLIST_HOSTS, + name="blocked-hosts", + line_format="one_per_line", + ) + config_stub.val.content.blocking.hosts.lists = None + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.method = "hosts" + config_stub.val.content.blocking.whitelist = None host_blocker = host_blocker_factory() host_blocker.read_hosts() @@ -428,16 +504,22 @@ def test_config_change_initial(config_stub, tmpdir, host_blocker_factory): def test_config_change(config_stub, tmpdir, host_blocker_factory): """Ensure blocked-hosts resets if host-block-list is changed to None.""" filtered_blocked_hosts = BLOCKLIST_HOSTS[1:] # Exclude localhost - blocklist = blocklist_to_url(create_blocklist( - tmpdir, blocked_hosts=filtered_blocked_hosts, name='blocked-hosts', - line_format='one_per_line')) - config_stub.val.content.host_blocking.lists = [blocklist.toString()] - config_stub.val.content.host_blocking.enabled = True - config_stub.val.content.host_blocking.whitelist = None + blocklist = blocklist_to_url( + create_blocklist( + tmpdir, + blocked_hosts=filtered_blocked_hosts, + name="blocked-hosts", + line_format="one_per_line", + ) + ) + config_stub.val.content.blocking.hosts.lists = [blocklist.toString()] + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.method = "hosts" + config_stub.val.content.blocking.whitelist = None host_blocker = host_blocker_factory() host_blocker.read_hosts() - config_stub.val.content.host_blocking.lists = None + config_stub.val.content.blocking.hosts.lists = None host_blocker.read_hosts() for str_url in URLS_TO_CHECK: assert not host_blocker._is_blocked(QUrl(str_url)) @@ -447,26 +529,34 @@ def test_add_directory(config_stub, tmpdir, host_blocker_factory): """Ensure adblocker can import all files in a directory.""" blocklist_hosts2 = [] for i in BLOCKLIST_HOSTS[1:]: - blocklist_hosts2.append('1' + i) - - create_blocklist(tmpdir, blocked_hosts=BLOCKLIST_HOSTS, - name='blocked-hosts', line_format='one_per_line') - create_blocklist(tmpdir, blocked_hosts=blocklist_hosts2, - name='blocked-hosts2', line_format='one_per_line') - - config_stub.val.content.host_blocking.lists = [tmpdir.strpath] - config_stub.val.content.host_blocking.enabled = True + blocklist_hosts2.append("1" + i) + + create_blocklist( + tmpdir, + blocked_hosts=BLOCKLIST_HOSTS, + name="blocked-hosts", + line_format="one_per_line", + ) + create_blocklist( + tmpdir, + blocked_hosts=blocklist_hosts2, + name="blocked-hosts2", + line_format="one_per_line", + ) + + config_stub.val.content.blocking.hosts.lists = [tmpdir.strpath] + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.method = "hosts" host_blocker = host_blocker_factory() host_blocker.adblock_update() assert len(host_blocker._blocked_hosts) == len(blocklist_hosts2) * 2 def test_adblock_benchmark(data_tmpdir, benchmark, host_blocker_factory): - blocked_hosts = data_tmpdir / 'blocked-hosts' - blocked_hosts.write_text('\n'.join(utils.blocked_hosts()), - encoding='utf-8') + blocked_hosts = data_tmpdir / "blocked-hosts" + blocked_hosts.write_text("\n".join(utils.blocked_hosts()), encoding="utf-8") - url = QUrl('https://www.example.org/') + url = QUrl("https://www.example.org/") blocker = host_blocker_factory() blocker.read_hosts() assert blocker._blocked_hosts diff --git a/tests/unit/components/test_blockutils.py b/tests/unit/components/test_blockutils.py new file mode 100644 index 000000000..480a6f9eb --- /dev/null +++ b/tests/unit/components/test_blockutils.py @@ -0,0 +1,83 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: +#!/usr/bin/env python3 + +# Copyright 2020 Árni Dagur <arni@dagur.eu> +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>. + +import os +import io +from typing import IO + +from PyQt5.QtCore import QUrl + +import pytest + +from qutebrowser.components.utils import blockutils + + +@pytest.fixture +def pretend_blocklists(tmpdir): + """Put fake blocklists into a tempdir. + + Put fake blocklists blocklists into a temporary directory, then return + both a list containing `file://` urls, and the residing dir. + """ + data = [ + (["cdn.malwarecorp.is", "evil-industries.com"], "malicious-hosts.txt"), + (["news.moms-against-icecream.net"], "blocklist.list"), + ] + # Add a bunch of automatically generated blocklist as well + for n in range(8): + data.append(([f"example{n}.com", f"example{n+1}.net"], f"blocklist{n}")) + + bl_dst_dir = tmpdir / "blocklists" + bl_dst_dir.mkdir() + urls = [] + for blocklist_lines, filename in data: + bl_dst_path = bl_dst_dir / filename + with open(bl_dst_path, "w", encoding="utf-8") as f: + f.write("\n".join(blocklist_lines)) + assert os.path.isfile(bl_dst_path) + urls.append(QUrl.fromLocalFile(str(bl_dst_path)).toString()) + return urls, bl_dst_dir + + +def test_blocklist_dl(qtbot, pretend_blocklists): + total_expected = 10 + num_single_dl_called = 0 + + def on_single_download(download: IO[bytes]) -> None: + nonlocal num_single_dl_called + num_single_dl_called += 1 + + num_lines = 0 + with io.TextIOWrapper(download, encoding="utf-8") as dl_io: + for line in dl_io: + assert line.split(".")[-1].strip() in ("com", "net", "is") + num_lines += 1 + assert num_lines >= 1 + + list_qurls = [QUrl(blocklist) for blocklist in pretend_blocklists[0]] + + dl = blockutils.BlocklistDownloads(list_qurls) + dl.single_download_finished.connect(on_single_download) + + with qtbot.waitSignal(dl.all_downloads_finished) as blocker: + dl.initiate() + assert blocker.args == [total_expected] + + assert num_single_dl_called == total_expected diff --git a/tests/unit/components/test_braveadblock.py b/tests/unit/components/test_braveadblock.py new file mode 100644 index 000000000..8b953c439 --- /dev/null +++ b/tests/unit/components/test_braveadblock.py @@ -0,0 +1,368 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2020 Florian Bruhin (The Compiler) <mail@qutebrowser.org> +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>. + +import pathlib +import logging +import csv +import os.path +from typing import Iterable, Tuple + +from PyQt5.QtCore import QUrl + +import pytest + +from qutebrowser.api.interceptor import ResourceType +from qutebrowser.components import braveadblock +from qutebrowser.components.utils import blockutils +from helpers import utils + +pytestmark = pytest.mark.usefixtures("qapp") + +OKAY_URLS = [ + ( + "https://qutebrowser.org/icons/qutebrowser.svg", + "https://qutebrowser.org", + ResourceType.image, + ), + ( + "https://qutebrowser.org/doc/img/main.png", + "https://qutebrowser.org", + ResourceType.image, + ), + ( + "https://qutebrowser.org/media/font.css", + "https://qutebrowser.org", + ResourceType.stylesheet, + ), + ( + "https://www.ruv.is/sites/default/files/styles/2000x1125/public/fr_20180719_091367_1.jpg?itok=0zTNSKKS×tamp=1561275315", + "https://www.ruv.is/frett/2020/04/23/today-is-the-first-day-of-summer", + ResourceType.image, + ), + ("https://easylist.to/easylist/easylist.txt", None, ResourceType.main_frame), + ("https://easylist.to/easylist/easyprivacy.txt", None, ResourceType.main_frame), +] + +NOT_OKAY_URLS = [ + ( + "https://pagead2.googlesyndication.com/pcs/activeview?xai=AKAOjsvBN5MuZsVQyE7HD18bD-JjK589TD3zkugwCoLE2C5nP26WFNCQb8WwxzZTelPEHwwnhaOCsGxYc8WeFgYZLReqLYl8r9BtAQ6r83OHa04&sig=Cg0ArKJSzKMgXuVbXAD1EAE&adk=1473563476&tt=-1&bs=1431%2C473&mtos=120250,120250,120250,120250,120250&tos=120250,0,0,0,0&p=60,352,150,1080&mcvt=120250&rs=0&ht=0&tfs=5491&tls=125682&mc=1<e=0&bas=0&bac=0&if=1&met=ie&avms=nio&exg=1&md=2&btr=0&lm=2&rst=1587887205533&dlt=226&rpt=1849&isd=0&msd=0&ext&xdi=0&ps=1431%2C7860&ss=1440%2C810&pt=-1&bin=4&deb=1-0-0-1192-5-1191-1191-0-0-0&tvt=125678&is=728%2C90&iframe_loc=https%3A%2F%2Ftpc.googlesyndication.com%2Fsafeframe%2F1-0-37%2Fhtml%2Fcontainer.html&r=u&id=osdtos&vs=4&uc=1192&upc=1&tgt=DIV&cl=1&cec=1&wf=0&cac=1&cd=0x0&itpl=19&v=20200422", + "https://google.com", + ResourceType.image, + ), + ( + "https://e.deployads.com/e/myanimelist.net", + "https://myanimelist.net", + ResourceType.xhr, + ), + ( + "https://c.amazon-adsystem.com/aax2/apstag.js", + "https://www.reddit.com", + ResourceType.script, + ), + ( + "https://c.aaxads.com/aax.js?pub=AAX763KC6&hst=www.reddit.com&ver=1.2", + "https://www.reddit.com", + ResourceType.script, + ), + ( + "https://pixel.mathtag.com/sync/img/?mt_exid=10009&mt_exuid=&mm_bnc&mm_bct&UUID=c7b65ea6-76cc-4700-b0c7-6dbcd10820ed", + "https://damndelicious.net/2019/04/03/easy-slow-cooker-chili/", + ResourceType.image, + ), +] + + +def run_function_on_dataset(given_function): + """Run the given function on a bunch of urls. + + In the data folder, we have a file called `adblock_dataset.tsv`, which + contains tuples of (url, source_url, type) in each line. We give these + to values to the given function, row by row. + """ + dataset = utils.adblock_dataset_tsv() + reader = csv.DictReader(dataset, delimiter="\t") + for row in reader: + url = QUrl(row["url"]) + source_url = QUrl(row["source_url"]) + resource_type = ResourceType[row["type"]] + given_function(url, source_url, resource_type) + + +def assert_none_blocked(ad_blocker): + assert_urls(ad_blocker, NOT_OKAY_URLS + OKAY_URLS, False) + + def assert_not_blocked(url, source_url, resource_type): + nonlocal ad_blocker + assert not ad_blocker._is_blocked(url, source_url, resource_type) + + run_function_on_dataset(assert_not_blocked) + + +@pytest.fixture +def blocklist_invalid_utf8(tmpdir): + dest_path = tmpdir / "invalid_utf8.txt" + dest_path.write_binary(b"invalidutf8\xa0") + return QUrl.fromLocalFile(str(dest_path)).toString() + + +@pytest.fixture +def easylist_easyprivacy_both(tmpdir): + """Put easyprivacy and easylist blocklists into a tempdir. + + Copy the easyprivacy and easylist blocklists into a temporary directory, + then return both a list containing `file://` urls, and the residing dir. + """ + bl_dst_dir = tmpdir / "blocklists" + bl_dst_dir.mkdir() + urls = [] + for blocklist, filename in [ + (utils.easylist_txt(), "easylist.txt"), + (utils.easyprivacy_txt(), "easyprivacy.txt"), + ]: + bl_dst_path = bl_dst_dir / filename + with open(bl_dst_path, "w", encoding="utf-8") as f: + f.write("\n".join(list(blocklist))) + assert os.path.isfile(bl_dst_path) + urls.append(QUrl.fromLocalFile(str(bl_dst_path)).toString()) + return urls, bl_dst_dir + + +@pytest.fixture +def empty_dir(tmpdir): + empty_dir_path = tmpdir / "empty_dir" + empty_dir_path.mkdir() + return empty_dir_path + + +@pytest.fixture +def easylist_easyprivacy(easylist_easyprivacy_both): + """The first return value of `easylist_easyprivacy_both`.""" + return easylist_easyprivacy_both[0] + + +@pytest.fixture +def ad_blocker(config_stub, data_tmpdir): + pytest.importorskip("adblock") + return braveadblock.BraveAdBlocker(data_dir=pathlib.Path(str(data_tmpdir))) + + +def assert_only_one_success_message(messages): + expected_msg = "braveadblock: Filters successfully read" + assert len([m for m in messages if m.startswith(expected_msg)]) == 1 + + +def assert_urls( + ad_blocker: braveadblock.BraveAdBlocker, + urls: Iterable[Tuple[str, str, ResourceType]], + should_be_blocked: bool, +) -> None: + for (str_url, source_str_url, request_type) in urls: + url = QUrl(str_url) + source_url = QUrl(source_str_url) + is_blocked = ad_blocker._is_blocked(url, source_url, request_type) + assert is_blocked == should_be_blocked + + +@pytest.mark.parametrize( + "blocking_enabled, method, should_be_blocked", + [ + (True, "auto", True), + (True, "adblock", True), + (True, "both", True), + (True, "hosts", False), + (False, "auto", False), + (False, "adblock", False), + (False, "both", False), + (False, "hosts", False), + ], +) +def test_blocking_enabled( + config_stub, + easylist_easyprivacy, + caplog, + ad_blocker, + blocking_enabled, + method, + should_be_blocked, +): + """Tests that the ads are blocked when the adblocker is enabled, and vice versa.""" + config_stub.val.content.blocking.adblock.lists = easylist_easyprivacy + config_stub.val.content.blocking.enabled = blocking_enabled + config_stub.val.content.blocking.method = method + # Simulate the method-changed hook being run, since it doesn't execute + # with pytest. + ad_blocker.enabled = braveadblock._should_be_used() + + downloads = ad_blocker.adblock_update() + while downloads._in_progress: + current_download = downloads._in_progress[0] + with caplog.at_level(logging.ERROR): + current_download.successful = True + current_download.finished.emit() + assert_urls(ad_blocker, NOT_OKAY_URLS, should_be_blocked) + assert_urls(ad_blocker, OKAY_URLS, False) + + +def test_adblock_cache(config_stub, easylist_easyprivacy, caplog, ad_blocker): + config_stub.val.content.blocking.adblock.lists = easylist_easyprivacy + config_stub.val.content.blocking.enabled = True + + for i in range(3): + print("At cache test iteration {}".format(i)) + # Trying to read the cache before calling the update command should return + # a log message. + with caplog.at_level(logging.INFO): + ad_blocker.read_cache() + caplog.messages[-1].startswith( + "Run :brave-adblock-update to get adblock lists." + ) + + if i == 0: + # We haven't initialized the ad blocker yet, so we shouldn't be blocking + # anything. + assert_none_blocked(ad_blocker) + + # Now we initialize the adblocker. + downloads = ad_blocker.adblock_update() + while downloads._in_progress: + current_download = downloads._in_progress[0] + with caplog.at_level(logging.ERROR): + current_download.successful = True + current_download.finished.emit() + + # After initializing the the adblocker, we should start seeing ads + # blocked. + assert_urls(ad_blocker, NOT_OKAY_URLS, True) + assert_urls(ad_blocker, OKAY_URLS, False) + + # After reading the cache, we should still be seeing ads blocked. + ad_blocker.read_cache() + assert_urls(ad_blocker, NOT_OKAY_URLS, True) + assert_urls(ad_blocker, OKAY_URLS, False) + + # Now we remove the cache file and try all over again... + ad_blocker._cache_path.unlink() + + +def test_invalid_utf8(ad_blocker, config_stub, blocklist_invalid_utf8, caplog): + """Test that the adblocker handles invalid utf-8 correctly.""" + config_stub.val.content.blocking.adblock.lists = [blocklist_invalid_utf8] + config_stub.val.content.blocking.enabled = True + + with caplog.at_level(logging.INFO): + ad_blocker.adblock_update() + expected = "braveadblock: Block list is not valid utf-8" + assert caplog.messages[-2].startswith(expected) + + +def test_config_changed(ad_blocker, config_stub, easylist_easyprivacy, caplog): + """Ensure blocked-hosts resets if host-block-list is changed to None.""" + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.whitelist = None + + for _ in range(2): + # We should be blocking like normal, since the block lists are set to + # easylist and easyprivacy. + config_stub.val.content.blocking.adblock.lists = easylist_easyprivacy + downloads = ad_blocker.adblock_update() + while downloads._in_progress: + current_download = downloads._in_progress[0] + with caplog.at_level(logging.ERROR): + current_download.successful = True + current_download.finished.emit() + assert_urls(ad_blocker, NOT_OKAY_URLS, True) + assert_urls(ad_blocker, OKAY_URLS, False) + + # After setting the ad blocking lists to None, the ads should still be + # blocked, since we haven't run `:brave-adblock-update`. + config_stub.val.content.blocking.adblock.lists = None + assert_urls(ad_blocker, NOT_OKAY_URLS, True) + assert_urls(ad_blocker, OKAY_URLS, False) + + # After updating the adblocker, nothing should be blocked, since we set + # the blocklist to None. + downloads = ad_blocker.adblock_update() + while downloads._in_progress: + current_download = downloads._in_progress[0] + with caplog.at_level(logging.ERROR): + current_download.successful = True + current_download.finished.emit() + assert_none_blocked(ad_blocker) + + +def test_whitelist_on_dataset(config_stub, easylist_easyprivacy): + config_stub.val.content.blocking.adblock.lists = easylist_easyprivacy + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.whitelist = None + + def assert_whitelisted(url, source_url, resource_type): + config_stub.val.content.blocking.whitelist = None + assert not blockutils.is_whitelisted_url(url) + config_stub.val.content.blocking.whitelist = [] + assert not blockutils.is_whitelisted_url(url) + whitelist_url = url.toString(QUrl.RemovePath) + "/*" + config_stub.val.content.blocking.whitelist = [whitelist_url] + assert blockutils.is_whitelisted_url(url) + + run_function_on_dataset(assert_whitelisted) + + +def test_update_easylist_easyprivacy_directory( + ad_blocker, config_stub, easylist_easyprivacy_both, caplog +): + # This directory should contain two text files, one for easylist, another + # for easyprivacy. + lists_directory = easylist_easyprivacy_both[1] + + config_stub.val.content.blocking.adblock.lists = [ + QUrl.fromLocalFile(str(lists_directory)).toString() + ] + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.whitelist = None + + with caplog.at_level(logging.INFO): + ad_blocker.adblock_update() + assert_only_one_success_message(caplog.messages) + assert ( + caplog.messages[-1] + == "braveadblock: Filters successfully read from 2 sources" + ) + assert_urls(ad_blocker, NOT_OKAY_URLS, True) + assert_urls(ad_blocker, OKAY_URLS, False) + + +def test_update_empty_directory_blocklist(ad_blocker, config_stub, empty_dir, caplog): + tmpdir_url = QUrl.fromLocalFile(str(empty_dir)).toString() + config_stub.val.content.blocking.adblock.lists = [tmpdir_url] + config_stub.val.content.blocking.enabled = True + config_stub.val.content.blocking.whitelist = None + + # The temporary directory we created should be empty + assert len(empty_dir.listdir()) == 0 + + with caplog.at_level(logging.INFO): + ad_blocker.adblock_update() + assert_only_one_success_message(caplog.messages) + assert ( + caplog.messages[-1] + == "braveadblock: Filters successfully read from 0 sources" + ) + + # There are no filters, so no ads should be blocked. + assert_none_blocked(ad_blocker) diff --git a/tests/unit/config/test_configcommands.py b/tests/unit/config/test_configcommands.py index 220aa40f7..dc62717e5 100644 --- a/tests/unit/config/test_configcommands.py +++ b/tests/unit/config/test_configcommands.py @@ -297,7 +297,7 @@ class TestAdd: @pytest.mark.parametrize('temp', [True, False]) @pytest.mark.parametrize('value', ['test1', 'test2']) def test_list_add(self, commands, config_stub, yaml_value, temp, value): - name = 'content.host_blocking.whitelist' + name = 'content.blocking.whitelist' commands.config_list_add(name, value, temp=temp) @@ -324,7 +324,7 @@ class TestAdd: with pytest.raises( cmdutils.CommandError, match="Invalid value '{}'".format(value)): - commands.config_list_add('content.host_blocking.whitelist', value) + commands.config_list_add('content.blocking.whitelist', value) @pytest.mark.parametrize('value', ['test1', 'test2']) @pytest.mark.parametrize('temp', [True, False]) diff --git a/tests/unit/utils/test_version.py b/tests/unit/utils/test_version.py index 922692fdd..e61505993 100644 --- a/tests/unit/utils/test_version.py +++ b/tests/unit/utils/test_version.py @@ -544,7 +544,7 @@ class ImportFake: Attributes: modules: A dict mapping module names to bools. If True, the import will - success. Otherwise, it'll fail with ImportError. + succeed. Otherwise, it'll fail with ImportError. version_attribute: The name to use in the fake modules for the version attribute. version: The version to use for the modules. @@ -560,6 +560,7 @@ class ImportFake: ('jinja2', True), ('pygments', True), ('yaml', True), + ('adblock', True), ('attr', True), ('importlib_resources', True), ('PyQt5.QtWebEngineWidgets', True), @@ -623,7 +624,7 @@ def import_fake(monkeypatch): class TestModuleVersions: - """Tests for _module_versions().""" + """Tests for _module_versions() and ModuleInfo.""" def test_all_present(self, import_fake): """Test with all modules present in version 1.2.3.""" @@ -637,6 +638,7 @@ class TestModuleVersions: @pytest.mark.parametrize('module, idx, expected', [ ('colorama', 1, 'colorama: no'), + ('adblock', 6, 'adblock: no'), ]) def test_missing_module(self, module, idx, expected, import_fake): """Test with a module missing. @@ -647,8 +649,39 @@ class TestModuleVersions: expected: The expected text. """ import_fake.modules[module] = False + # Needed after mocking the module + mod_info = version.MODULE_INFO[module] + mod_info._reset_cache() + assert version._module_versions()[idx] == expected + for method_name, expected_result in [ + ("is_installed", False), + ("get_version", None), + ("is_outdated", None) + ]: + method = getattr(mod_info, method_name) + # With hot cache + mod_info._initialize_info() + assert method() == expected_result + # With cold cache + mod_info._reset_cache() + assert method() == expected_result + + def test_outdated_adblock(self, import_fake): + """Test that warning is shown when adblock module is outdated.""" + min_version = version.MODULE_INFO["adblock"].min_version + fake_version = "0.1.0" + + # Needed after mocking version attribute + version.MODULE_INFO["adblock"]._reset_cache() + + assert min_version is not None + assert fake_version < min_version + import_fake.version = fake_version + expected = f"adblock: {fake_version} (< {min_version}, outdated)" + assert version._module_versions()[6] == expected + @pytest.mark.parametrize('attribute, expected_modules', [ ('VERSION', ['colorama']), ('SIP_VERSION_STR', ['sip']), @@ -665,12 +698,22 @@ class TestModuleVersions: expected: The expected return value. """ import_fake.version_attribute = attribute + + for mod_info in version.MODULE_INFO.values(): + # Invalidate the "version cache" since we just mocked some of the + # attributes. + mod_info._reset_cache() + expected = [] for name in import_fake.modules: + mod_info = version.MODULE_INFO[name] if name in expected_modules: + assert mod_info.get_version() == "1.2.3" expected.append('{}: 1.2.3'.format(name)) else: + assert mod_info.get_version() is None expected.append('{}: yes'.format(name)) + assert version._module_versions() == expected @pytest.mark.parametrize('name, has_version', [ @@ -680,6 +723,7 @@ class TestModuleVersions: ('jinja2', True), ('pygments', True), ('yaml', True), + ('adblock', True), ('attr', True), ]) def test_existing_attributes(self, name, has_version): |