summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Bruhin <git@the-compiler.org>2017-03-07 21:44:32 +0100
committerFlorian Bruhin <git@the-compiler.org>2017-03-07 22:06:29 +0100
commit1f850b8de94a467d42e030534d65557735209c02 (patch)
treecc223a685edeb79dbdd9b4236d9f193cd5a8a919
parent40d3679073d41e4353f53022dd0e1a2969b9ae18 (diff)
downloadqutebrowser-1f850b8de94a467d42e030534d65557735209c02.tar.gz
qutebrowser-1f850b8de94a467d42e030534d65557735209c02.zip
Refactor adblock parsing
(cherry picked from commit 4c3c86081fedef7b5e9192f55cb29e8774c965cf)
-rw-r--r--qutebrowser/browser/adblock.py85
1 files changed, 50 insertions, 35 deletions
diff --git a/qutebrowser/browser/adblock.py b/qutebrowser/browser/adblock.py
index 6e2adf923..53e3165ca 100644
--- a/qutebrowser/browser/adblock.py
+++ b/qutebrowser/browser/adblock.py
@@ -205,6 +205,54 @@ class HostBlocker:
download.finished.connect(
functools.partial(self.on_download_finished, download))
+ def _parse_line(self, line):
+ """Parse a line from a host file.
+
+ Args:
+ line: The bytes object to parse.
+
+ Returns:
+ True if parsing succeeded, False otherwise.
+ """
+ if line.startswith(b'#'):
+ # Ignoring comments early so we don't have to care about
+ # encoding errors in them.
+ return True
+
+ try:
+ line = line.decode('utf-8')
+ except UnicodeDecodeError:
+ log.misc.error("Failed to decode: {!r}".format(line))
+ return False
+
+ # Remove comments
+ try:
+ hash_idx = line.index('#')
+ line = line[:hash_idx]
+ except ValueError:
+ pass
+
+ line = line.strip()
+ # Skip empty lines
+ if not line:
+ return True
+
+ parts = line.split()
+ if len(parts) == 1:
+ # "one host per line" format
+ host = parts[0]
+ elif len(parts) == 2:
+ # /etc/hosts format
+ host = parts[1]
+ else:
+ log.misc.error("Failed to parse: {!r}".format(line))
+ return False
+
+ if host not in self.WHITELISTED:
+ self._blocked_hosts.add(host)
+
+ return True
+
def _merge_file(self, byte_io):
"""Read and merge host files.
@@ -225,43 +273,10 @@ class HostBlocker:
return
for line in f:
- if line.startswith(b'#'):
- # Ignoring comments early so we don't have to care about
- # encoding errors in them.
- continue
-
line_count += 1
-
- try:
- line = line.decode('utf-8')
- except UnicodeDecodeError:
- log.misc.error("Failed to decode: {!r}".format(line))
- error_count += 1
- continue
-
- # Remove comments
- try:
- hash_idx = line.index('#')
- line = line[:hash_idx]
- except ValueError:
- pass
- line = line.strip()
- # Skip empty lines
- if not line:
- continue
- parts = line.split()
- if len(parts) == 1:
- # "one host per line" format
- host = parts[0]
- elif len(parts) == 2:
- # /etc/hosts format
- host = parts[1]
- else:
+ ok = self._parse_line(line)
+ if not ok:
error_count += 1
- log.misc.error("Failed to parse: {!r}".format(line))
- continue
- if host not in self.WHITELISTED:
- self._blocked_hosts.add(host)
log.misc.debug("{}: read {} lines".format(byte_io.name, line_count))
if error_count > 0: