diff options
author | Florian Bruhin <git@the-compiler.org> | 2017-03-07 21:44:32 +0100 |
---|---|---|
committer | Florian Bruhin <git@the-compiler.org> | 2017-03-07 22:06:29 +0100 |
commit | 1f850b8de94a467d42e030534d65557735209c02 (patch) | |
tree | cc223a685edeb79dbdd9b4236d9f193cd5a8a919 | |
parent | 40d3679073d41e4353f53022dd0e1a2969b9ae18 (diff) | |
download | qutebrowser-1f850b8de94a467d42e030534d65557735209c02.tar.gz qutebrowser-1f850b8de94a467d42e030534d65557735209c02.zip |
Refactor adblock parsing
(cherry picked from commit 4c3c86081fedef7b5e9192f55cb29e8774c965cf)
-rw-r--r-- | qutebrowser/browser/adblock.py | 85 |
1 files changed, 50 insertions, 35 deletions
diff --git a/qutebrowser/browser/adblock.py b/qutebrowser/browser/adblock.py index 6e2adf923..53e3165ca 100644 --- a/qutebrowser/browser/adblock.py +++ b/qutebrowser/browser/adblock.py @@ -205,6 +205,54 @@ class HostBlocker: download.finished.connect( functools.partial(self.on_download_finished, download)) + def _parse_line(self, line): + """Parse a line from a host file. + + Args: + line: The bytes object to parse. + + Returns: + True if parsing succeeded, False otherwise. + """ + if line.startswith(b'#'): + # Ignoring comments early so we don't have to care about + # encoding errors in them. + return True + + try: + line = line.decode('utf-8') + except UnicodeDecodeError: + log.misc.error("Failed to decode: {!r}".format(line)) + return False + + # Remove comments + try: + hash_idx = line.index('#') + line = line[:hash_idx] + except ValueError: + pass + + line = line.strip() + # Skip empty lines + if not line: + return True + + parts = line.split() + if len(parts) == 1: + # "one host per line" format + host = parts[0] + elif len(parts) == 2: + # /etc/hosts format + host = parts[1] + else: + log.misc.error("Failed to parse: {!r}".format(line)) + return False + + if host not in self.WHITELISTED: + self._blocked_hosts.add(host) + + return True + def _merge_file(self, byte_io): """Read and merge host files. @@ -225,43 +273,10 @@ class HostBlocker: return for line in f: - if line.startswith(b'#'): - # Ignoring comments early so we don't have to care about - # encoding errors in them. - continue - line_count += 1 - - try: - line = line.decode('utf-8') - except UnicodeDecodeError: - log.misc.error("Failed to decode: {!r}".format(line)) - error_count += 1 - continue - - # Remove comments - try: - hash_idx = line.index('#') - line = line[:hash_idx] - except ValueError: - pass - line = line.strip() - # Skip empty lines - if not line: - continue - parts = line.split() - if len(parts) == 1: - # "one host per line" format - host = parts[0] - elif len(parts) == 2: - # /etc/hosts format - host = parts[1] - else: + ok = self._parse_line(line) + if not ok: error_count += 1 - log.misc.error("Failed to parse: {!r}".format(line)) - continue - if host not in self.WHITELISTED: - self._blocked_hosts.add(host) log.misc.debug("{}: read {} lines".format(byte_io.name, line_count)) if error_count > 0: |