summaryrefslogtreecommitdiff
path: root/scripts/maint/updateFallbackDirs.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/maint/updateFallbackDirs.py')
-rwxr-xr-xscripts/maint/updateFallbackDirs.py550
1 files changed, 510 insertions, 40 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
index 793ec7d924..4cfee5ddb5 100755
--- a/scripts/maint/updateFallbackDirs.py
+++ b/scripts/maint/updateFallbackDirs.py
@@ -2,9 +2,12 @@
# Usage: scripts/maint/updateFallbackDirs.py > src/or/fallback_dirs.inc
# Needs stem available in your PYTHONPATH, or just ln -s ../stem/stem .
+# Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package)
+# for netblock analysis, in PYTHONPATH, or just
+# ln -s ../py2-ipaddress-3.4.1/ipaddress.py .
#
-# Then read the generated list to ensure no-one slipped anything funny into
-# their name or contactinfo
+# Then read the logs to make sure the fallbacks aren't dominated by a single
+# netblock or port
# Script by weasel, April 2015
# Portions by gsathya & karsten, 2013
@@ -34,6 +37,21 @@ import logging
# INFO tells you why each relay was included or excluded
# WARN tells you about potential misconfigurations
logging.basicConfig(level=logging.WARNING)
+logging.root.name = ''
+# INFO tells you about each consensus download attempt
+logging.getLogger('stem').setLevel(logging.WARNING)
+
+HAVE_IPADDRESS = False
+try:
+ # python 3 builtin, or install package py2-ipaddress
+ # there are several ipaddress implementations for python 2
+ # with slightly different semantics with str typed text
+ # fortunately, all our IP addresses are in unicode
+ import ipaddress
+ HAVE_IPADDRESS = True
+except ImportError:
+ # if this happens, we avoid doing netblock analysis
+ logging.warning('Unable to import ipaddress, please install py2-ipaddress')
## Top-Level Configuration
@@ -468,6 +486,9 @@ class Candidate(object):
# relays without advertised bandwdith have it calculated from their
# consensus weight
details['advertised_bandwidth'] = 0
+ if (not 'effective_family' in details
+ or details['effective_family'] is None):
+ details['effective_family'] = []
details['last_changed_address_or_port'] = parse_ts(
details['last_changed_address_or_port'])
self._data = details
@@ -480,7 +501,7 @@ class Candidate(object):
if self.orport is None:
raise Exception("Failed to get an orport for %s."%(self._fpr,))
self._compute_ipv6addr()
- if self.ipv6addr is None:
+ if not self.has_ipv6():
logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
def _stable_sort_or_addresses(self):
@@ -584,14 +605,14 @@ class Candidate(object):
(ipaddr, port) = i.rsplit(':', 1)
if (port == self.orport) and Candidate.is_valid_ipv6_address(ipaddr):
self.ipv6addr = ipaddr
- self.ipv6orport = port
+ self.ipv6orport = int(port)
return
# Choose the first IPv6 address in the list
for i in self._data['or_addresses']:
(ipaddr, port) = i.rsplit(':', 1)
if Candidate.is_valid_ipv6_address(ipaddr):
self.ipv6addr = ipaddr
- self.ipv6orport = port
+ self.ipv6orport = int(port)
return
@staticmethod
@@ -804,9 +825,10 @@ class Candidate(object):
'ORPort (%d) does not match entry ORPort (%d)',
self._fpr, self.orport, int(entry['orport']))
continue
- has_ipv6 = self.ipv6addr is not None and self.ipv6orport is not None
- if (entry.has_key('ipv6') and has_ipv6):
- ipv6 = self.ipv6addr + ':' + self.ipv6orport
+ ipv6 = None
+ if self.has_ipv6():
+ ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
+ if entry.has_key('ipv6') and self.has_ipv6():
# if both entry and fallback have an ipv6 address, compare them
if entry['ipv6'] != ipv6:
logging.info('%s is not in the whitelist: fingerprint matches, ' +
@@ -815,14 +837,14 @@ class Candidate(object):
continue
# if the fallback has an IPv6 address but the whitelist entry
# doesn't, or vice versa, the whitelist entry doesn't match
- elif entry.has_key('ipv6') and not has_ipv6:
+ elif entry.has_key('ipv6') and not self.has_ipv6():
logging.info('%s is not in the whitelist: fingerprint matches, but ' +
'it has no IPv6, and entry has IPv6 (%s)', self._fpr,
entry['ipv6'])
logging.warning('%s excluded: has it lost its former IPv6 address %s?',
self._fpr, entry['ipv6'])
continue
- elif not entry.has_key('ipv6') and has_ipv6:
+ elif not entry.has_key('ipv6') and self.has_ipv6():
logging.info('%s is not in the whitelist: fingerprint matches, but ' +
'it has IPv6 (%s), and entry has no IPv6', self._fpr,
ipv6)
@@ -871,9 +893,10 @@ class Candidate(object):
'entry has no DirPort or ORPort', self._fpr,
self.dirip)
return True
- has_ipv6 = self.ipv6addr is not None and self.ipv6orport is not None
- ipv6 = (self.ipv6addr + ':' + self.ipv6orport) if has_ipv6 else None
- if (key == 'ipv6' and has_ipv6):
+ ipv6 = None
+ if self.has_ipv6():
+ ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
+ if (key == 'ipv6' and self.has_ipv6()):
# if both entry and fallback have an ipv6 address, compare them,
# otherwise, disregard ipv6 addresses
if value == ipv6:
@@ -889,18 +912,18 @@ class Candidate(object):
logging.info('%s is in the blacklist: IPv6 (%s) matches, and' +
'entry has no DirPort', self._fpr, ipv6)
return True
- elif (key == 'ipv6' or has_ipv6):
+ elif (key == 'ipv6' or self.has_ipv6()):
# only log if the fingerprint matches but the IPv6 doesn't
if entry.has_key('id') and entry['id'] == self._fpr:
logging.info('%s skipping IPv6 blacklist comparison: relay ' +
'has%s IPv6%s, but entry has%s IPv6%s', self._fpr,
- '' if has_ipv6 else ' no',
- (' (' + ipv6 + ')') if has_ipv6 else '',
+ '' if self.has_ipv6() else ' no',
+ (' (' + ipv6 + ')') if self.has_ipv6() else '',
'' if key == 'ipv6' else ' no',
(' (' + value + ')') if key == 'ipv6' else '')
logging.warning('Has %s %s IPv6 address %s?', self._fpr,
- 'gained an' if has_ipv6 else 'lost its former',
- ipv6 if has_ipv6 else value)
+ 'gained an' if self.has_ipv6() else 'lost its former',
+ ipv6 if self.has_ipv6() else value)
return False
def cw_to_bw_factor(self):
@@ -936,6 +959,101 @@ class Candidate(object):
def is_running(self):
return 'Running' in self._data['flags']
+ # does this fallback have an IPv6 address and orport?
+ def has_ipv6(self):
+ return self.ipv6addr is not None and self.ipv6orport is not None
+
+ # strip leading and trailing brackets from an IPv6 address
+ # safe to use on non-bracketed IPv6 and on IPv4 addresses
+ # also convert to unicode, and make None appear as ''
+ @staticmethod
+ def strip_ipv6_brackets(ip):
+ if ip is None:
+ return unicode('')
+ if len(ip) < 2:
+ return unicode(ip)
+ if ip[0] == '[' and ip[-1] == ']':
+ return unicode(ip[1:-1])
+ return unicode(ip)
+
+ # are ip_a and ip_b in the same netblock?
+ # mask_bits is the size of the netblock
+ # takes both IPv4 and IPv6 addresses
+ # the versions of ip_a and ip_b must be the same
+ # the mask must be valid for the IP version
+ @staticmethod
+ def netblocks_equal(ip_a, ip_b, mask_bits):
+ if ip_a is None or ip_b is None:
+ return False
+ ip_a = Candidate.strip_ipv6_brackets(ip_a)
+ ip_b = Candidate.strip_ipv6_brackets(ip_b)
+ a = ipaddress.ip_address(ip_a)
+ b = ipaddress.ip_address(ip_b)
+ if a.version != b.version:
+ raise Exception('Mismatching IP versions in %s and %s'%(ip_a, ip_b))
+ if mask_bits > a.max_prefixlen:
+ logging.warning('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b))
+ mask_bits = a.max_prefixlen
+ if mask_bits < 0:
+ logging.warning('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b))
+ mask_bits = 0
+ a_net = ipaddress.ip_network('%s/%d'%(ip_a, mask_bits), strict=False)
+ return b in a_net
+
+ # is this fallback's IPv4 address (dirip) in the same netblock as other's
+ # IPv4 address?
+ # mask_bits is the size of the netblock
+ def ipv4_netblocks_equal(self, other, mask_bits):
+ return Candidate.netblocks_equal(self.dirip, other.dirip, mask_bits)
+
+ # is this fallback's IPv6 address (ipv6addr) in the same netblock as
+ # other's IPv6 address?
+ # Returns False if either fallback has no IPv6 address
+ # mask_bits is the size of the netblock
+ def ipv6_netblocks_equal(self, other, mask_bits):
+ if not self.has_ipv6() or not other.has_ipv6():
+ return False
+ return Candidate.netblocks_equal(self.ipv6addr, other.ipv6addr, mask_bits)
+
+ # is this fallback's IPv4 DirPort the same as other's IPv4 DirPort?
+ def dirport_equal(self, other):
+ return self.dirport == other.dirport
+
+ # is this fallback's IPv4 ORPort the same as other's IPv4 ORPort?
+ def ipv4_orport_equal(self, other):
+ return self.orport == other.orport
+
+ # is this fallback's IPv6 ORPort the same as other's IPv6 ORPort?
+ # Returns False if either fallback has no IPv6 address
+ def ipv6_orport_equal(self, other):
+ if not self.has_ipv6() or not other.has_ipv6():
+ return False
+ return self.ipv6orport == other.ipv6orport
+
+ # does this fallback have the same DirPort, IPv4 ORPort, or
+ # IPv6 ORPort as other?
+ # Ignores IPv6 ORPort if either fallback has no IPv6 address
+ def port_equal(self, other):
+ return (self.dirport_equal(other) or self.ipv4_orport_equal(other)
+ or self.ipv6_orport_equal(other))
+
+ # return a list containing IPv4 ORPort, DirPort, and IPv6 ORPort (if present)
+ def port_list(self):
+ ports = [self.dirport, self.orport]
+ if self.has_ipv6() and not self.ipv6orport in ports:
+ ports.append(self.ipv6orport)
+ return ports
+
+ # does this fallback share a port with other, regardless of whether the
+ # port types match?
+ # For example, if self's IPv4 ORPort is 80 and other's DirPort is 80,
+ # return True
+ def port_shared(self, other):
+ for p in self.port_list():
+ if p in other.port_list():
+ return True
+ return False
+
# report how long it takes to download a consensus from dirip:dirport
@staticmethod
def fallback_consensus_download_speed(dirip, dirport, nickname, max_time):
@@ -984,7 +1102,7 @@ class Candidate(object):
self.dirport,
self._data['nickname'],
CONSENSUS_DOWNLOAD_SPEED_MAX)
- if self.ipv6addr is not None and PERFORM_IPV6_DIRPORT_CHECKS:
+ if self.has_ipv6() and PERFORM_IPV6_DIRPORT_CHECKS:
# Clients assume the IPv6 DirPort is the same as the IPv4 DirPort
ipv6_failed = Candidate.fallback_consensus_download_speed(self.ipv6addr,
self.dirport,
@@ -1086,9 +1204,8 @@ class Candidate(object):
self.orport,
cleanse_c_string(self._fpr))
s += '\n'
- if self.ipv6addr is not None:
- s += '" ipv6=%s:%s"'%(
- cleanse_c_string(self.ipv6addr), cleanse_c_string(self.ipv6orport))
+ if self.has_ipv6():
+ s += '" ipv6=%s:%d"'%(cleanse_c_string(self.ipv6addr), self.ipv6orport)
s += '\n'
s += '" weight=%d",'%(FALLBACK_OUTPUT_WEIGHT)
if comment_string:
@@ -1126,7 +1243,7 @@ class CandidateList(dict):
d = fetch('details',
fields=('fingerprint,nickname,contact,last_changed_address_or_port,' +
'consensus_weight,advertised_bandwidth,or_addresses,' +
- 'dir_address,recommended_version,flags'))
+ 'dir_address,recommended_version,flags,effective_family'))
logging.debug('Loading details document done.')
if not 'relays' in d: raise Exception("No relays found in document.")
@@ -1163,19 +1280,19 @@ class CandidateList(dict):
# lowest to highest
# used to find the median cw_to_bw_factor()
def sort_fallbacks_by_cw_to_bw_factor(self):
- self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor(), self.fallbacks)
+ self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor())
# sort fallbacks by their measured bandwidth, highest to lowest
# calculate_measured_bandwidth before calling this
# this is useful for reviewing candidates in priority order
def sort_fallbacks_by_measured_bandwidth(self):
self.fallbacks.sort(key=lambda f: f._data['measured_bandwidth'],
- self.fallbacks, reverse=True)
+ reverse=True)
# sort fallbacks by their fingerprint, lowest to highest
# this is useful for stable diffs of fallback lists
def sort_fallbacks_by_fingerprint(self):
- self.fallbacks.sort(key=lambda f: self[f]._fpr, self.fallbacks)
+ self.fallbacks.sort(key=lambda f: f._fpr)
@staticmethod
def load_relaylist(file_name):
@@ -1341,6 +1458,91 @@ class CandidateList(dict):
else:
return None
+ # does exclusion_list contain attribute?
+ # if so, return False
+ # if not, return True
+ # if attribute is None or the empty string, always return True
+ @staticmethod
+ def allow(attribute, exclusion_list):
+ if attribute is None or attribute == '':
+ return True
+ elif attribute in exclusion_list:
+ return False
+ else:
+ return True
+
+ # make sure there is only one fallback per IPv4 address, and per IPv6 address
+ # there is only one IPv4 address on each fallback: the IPv4 DirPort address
+ # (we choose the IPv4 ORPort which is on the same IPv4 as the DirPort)
+ # there is at most one IPv6 address on each fallback: the IPv6 ORPort address
+ # we try to match the IPv4 ORPort, but will use any IPv6 address if needed
+ # (clients assume the IPv6 DirPort is the same as the IPv4 DirPort, but
+ # typically only use the IPv6 ORPort)
+ # if there is no IPv6 address, only the IPv4 address is checked
+ # return the number of candidates we excluded
+ def limit_fallbacks_same_ip(self):
+ ip_limit_fallbacks = []
+ ip_list = []
+ for f in self.fallbacks:
+ if (CandidateList.allow(f.dirip, ip_list)
+ and CandidateList.allow(f.ipv6addr, ip_list)):
+ ip_limit_fallbacks.append(f)
+ ip_list.append(f.dirip)
+ if f.has_ipv6():
+ ip_list.append(f.ipv6addr)
+ elif not CandidateList.allow(f.dirip, ip_list):
+ logging.debug('Eliminated %s: already have fallback on IPv4 %s'%(
+ f._fpr, f.dirip))
+ elif f.has_ipv6() and not CandidateList.allow(f.ipv6addr, ip_list):
+ logging.debug('Eliminated %s: already have fallback on IPv6 %s'%(
+ f._fpr, f.ipv6addr))
+ original_count = len(self.fallbacks)
+ self.fallbacks = ip_limit_fallbacks
+ return original_count - len(self.fallbacks)
+
+ # make sure there is only one fallback per ContactInfo
+ # if there is no ContactInfo, allow the fallback
+ # this check can be gamed by providing no ContactInfo, or by setting the
+ # ContactInfo to match another fallback
+ # However, given the likelihood that relays with the same ContactInfo will
+ # go down at similar times, its usefulness outweighs the risk
+ def limit_fallbacks_same_contact(self):
+ contact_limit_fallbacks = []
+ contact_list = []
+ for f in self.fallbacks:
+ if CandidateList.allow(f._data['contact'], contact_list):
+ contact_limit_fallbacks.append(f)
+ contact_list.append(f._data['contact'])
+ else:
+ logging.debug(('Eliminated %s: already have fallback on ' +
+ 'ContactInfo %s')%(f._fpr, f._data['contact']))
+ original_count = len(self.fallbacks)
+ self.fallbacks = contact_limit_fallbacks
+ return original_count - len(self.fallbacks)
+
+ # make sure there is only one fallback per effective family
+ # if there is no family, allow the fallback
+ # this check can't be gamed, because we use effective family, which ensures
+ # mutual family declarations
+ # if any indirect families exist, the result depends on the order in which
+ # fallbacks are sorted in the list
+ def limit_fallbacks_same_family(self):
+ family_limit_fallbacks = []
+ fingerprint_list = []
+ for f in self.fallbacks:
+ if CandidateList.allow(f._fpr, fingerprint_list):
+ family_limit_fallbacks.append(f)
+ fingerprint_list.append(f._fpr)
+ fingerprint_list.extend(f._data['effective_family'])
+ else:
+ # technically, we already have a fallback with this fallback in its
+ # effective family
+ logging.debug('Eliminated %s: already have fallback in effective ' +
+ 'family'%(f._fpr))
+ original_count = len(self.fallbacks)
+ self.fallbacks = family_limit_fallbacks
+ return original_count - len(self.fallbacks)
+
# try a download check on each fallback candidate in order
# stop after max_count successful downloads
# but don't remove any candidates from the array
@@ -1361,6 +1563,7 @@ class CandidateList(dict):
# - eliminate failed candidates
# - if there are more than max_count candidates, eliminate lowest bandwidth
# - if there are fewer than max_count candidates, leave only successful
+ # Return the number of fallbacks that failed the consensus check
def perform_download_consensus_checks(self, max_count):
self.sort_fallbacks_by_measured_bandwidth()
self.try_download_consensus_checks(max_count)
@@ -1370,12 +1573,245 @@ class CandidateList(dict):
self.try_download_consensus_checks(max_count)
# now we have at least max_count successful candidates,
# or we've tried them all
+ original_count = len(self.fallbacks)
self.fallbacks = filter(lambda x: x.get_fallback_download_consensus(),
self.fallbacks)
+ # some of these failed the check, others skipped the check,
+ # if we already had enough successful downloads
+ failed_count = original_count - len(self.fallbacks)
self.fallbacks = self.fallbacks[:max_count]
+ return failed_count
+
+ # return a string that describes a/b as a percentage
+ @staticmethod
+ def describe_percentage(a, b):
+ return '%d/%d = %.0f%%'%(a, b, (a*100.0)/b)
+
+ # return a dictionary of lists of fallbacks by IPv4 netblock
+ # the dictionary is keyed by the fingerprint of an arbitrary fallback
+ # in each netblock
+ # mask_bits is the size of the netblock
+ def fallbacks_by_ipv4_netblock(self, mask_bits):
+ netblocks = {}
+ for f in self.fallbacks:
+ found_netblock = False
+ for b in netblocks.keys():
+ # we found an existing netblock containing this fallback
+ if f.ipv4_netblocks_equal(self[b], mask_bits):
+ # add it to the list
+ netblocks[b].append(f)
+ found_netblock = True
+ break
+ # make a new netblock based on this fallback's fingerprint
+ if not found_netblock:
+ netblocks[f._fpr] = [f]
+ return netblocks
+
+ # return a dictionary of lists of fallbacks by IPv6 netblock
+ # where mask_bits is the size of the netblock
+ def fallbacks_by_ipv6_netblock(self, mask_bits):
+ netblocks = {}
+ for f in self.fallbacks:
+ # skip fallbacks without IPv6 addresses
+ if not f.has_ipv6():
+ continue
+ found_netblock = False
+ for b in netblocks.keys():
+ # we found an existing netblock containing this fallback
+ if f.ipv6_netblocks_equal(self[b], mask_bits):
+ # add it to the list
+ netblocks[b].append(f)
+ found_netblock = True
+ break
+ # make a new netblock based on this fallback's fingerprint
+ if not found_netblock:
+ netblocks[f._fpr] = [f]
+ return netblocks
+
+ # log a message about the proportion of fallbacks in each IPv4 netblock,
+ # where mask_bits is the size of the netblock
+ def describe_fallback_ipv4_netblock_mask(self, mask_bits):
+ fallback_count = len(self.fallbacks)
+ shared_netblock_fallback_count = 0
+ most_frequent_netblock = None
+ netblocks = self.fallbacks_by_ipv4_netblock(mask_bits)
+ for b in netblocks.keys():
+ if len(netblocks[b]) > 1:
+ # how many fallbacks are in a netblock with other fallbacks?
+ shared_netblock_fallback_count += len(netblocks[b])
+ # what's the netblock with the most fallbacks?
+ if (most_frequent_netblock is None
+ or len(netblocks[b]) > len(netblocks[most_frequent_netblock])):
+ most_frequent_netblock = b
+ logging.debug('Fallback IPv4 addresses in the same /%d:'%(mask_bits))
+ for f in netblocks[b]:
+ logging.debug('%s - %s', f.dirip, f._fpr)
+ if most_frequent_netblock is not None:
+ logging.warning('There are %s fallbacks in the IPv4 /%d containing %s'%(
+ CandidateList.describe_percentage(
+ len(netblocks[most_frequent_netblock]),
+ fallback_count),
+ mask_bits,
+ self[most_frequent_netblock].dirip))
+ if shared_netblock_fallback_count > 0:
+ logging.warning(('%s of fallbacks are in an IPv4 /%d with other ' +
+ 'fallbacks')%(CandidateList.describe_percentage(
+ shared_netblock_fallback_count,
+ fallback_count),
+ mask_bits))
+
+ # log a message about the proportion of fallbacks in each IPv6 netblock,
+ # where mask_bits is the size of the netblock
+ def describe_fallback_ipv6_netblock_mask(self, mask_bits):
+ fallback_count = len(self.fallbacks_with_ipv6())
+ shared_netblock_fallback_count = 0
+ most_frequent_netblock = None
+ netblocks = self.fallbacks_by_ipv6_netblock(mask_bits)
+ for b in netblocks.keys():
+ if len(netblocks[b]) > 1:
+ # how many fallbacks are in a netblock with other fallbacks?
+ shared_netblock_fallback_count += len(netblocks[b])
+ # what's the netblock with the most fallbacks?
+ if (most_frequent_netblock is None
+ or len(netblocks[b]) > len(netblocks[most_frequent_netblock])):
+ most_frequent_netblock = b
+ logging.debug('Fallback IPv6 addresses in the same /%d:'%(mask_bits))
+ for f in netblocks[b]:
+ logging.debug('%s - %s', f.ipv6addr, f._fpr)
+ if most_frequent_netblock is not None:
+ logging.warning('There are %s fallbacks in the IPv6 /%d containing %s'%(
+ CandidateList.describe_percentage(
+ len(netblocks[most_frequent_netblock]),
+ fallback_count),
+ mask_bits,
+ self[most_frequent_netblock].ipv6addr))
+ if shared_netblock_fallback_count > 0:
+ logging.warning(('%s of fallbacks are in an IPv6 /%d with other ' +
+ 'fallbacks')%(CandidateList.describe_percentage(
+ shared_netblock_fallback_count,
+ fallback_count),
+ mask_bits))
+
+ # log a message about the proportion of fallbacks in each IPv4 /8, /16,
+ # and /24
+ def describe_fallback_ipv4_netblocks(self):
+ # this doesn't actually tell us anything useful
+ #self.describe_fallback_ipv4_netblock_mask(8)
+ self.describe_fallback_ipv4_netblock_mask(16)
+ self.describe_fallback_ipv4_netblock_mask(24)
+
+ # log a message about the proportion of fallbacks in each IPv6 /12 (RIR),
+ # /23 (smaller RIR blocks), /32 (LIR), /48 (Customer), and /64 (Host)
+ # https://www.iana.org/assignments/ipv6-unicast-address-assignments/
+ def describe_fallback_ipv6_netblocks(self):
+ # these don't actually tell us anything useful
+ #self.describe_fallback_ipv6_netblock_mask(12)
+ #self.describe_fallback_ipv6_netblock_mask(23)
+ self.describe_fallback_ipv6_netblock_mask(32)
+ self.describe_fallback_ipv6_netblock_mask(48)
+ self.describe_fallback_ipv6_netblock_mask(64)
+
+ # log a message about the proportion of fallbacks in each IPv4 and IPv6
+ # netblock
+ def describe_fallback_netblocks(self):
+ self.describe_fallback_ipv4_netblocks()
+ self.describe_fallback_ipv6_netblocks()
+
+ # return a list of fallbacks which are on the IPv4 ORPort port
+ def fallbacks_on_ipv4_orport(self, port):
+ return filter(lambda x: x.orport == port, self.fallbacks)
+
+ # return a list of fallbacks which are on the IPv6 ORPort port
+ def fallbacks_on_ipv6_orport(self, port):
+ return filter(lambda x: x.ipv6orport == port, self.fallbacks_with_ipv6())
+
+ # return a list of fallbacks which are on the DirPort port
+ def fallbacks_on_dirport(self, port):
+ return filter(lambda x: x.dirport == port, self.fallbacks)
+
+ # log a message about the proportion of fallbacks on IPv4 ORPort port
+ # and return that count
+ def describe_fallback_ipv4_orport(self, port):
+ port_count = len(self.fallbacks_on_ipv4_orport(port))
+ fallback_count = len(self.fallbacks)
+ logging.warning('%s of fallbacks are on IPv4 ORPort %d'%(
+ CandidateList.describe_percentage(port_count,
+ fallback_count),
+ port))
+ return port_count
+
+ # log a message about the proportion of IPv6 fallbacks on IPv6 ORPort port
+ # and return that count
+ def describe_fallback_ipv6_orport(self, port):
+ port_count = len(self.fallbacks_on_ipv6_orport(port))
+ fallback_count = len(self.fallbacks_with_ipv6())
+ logging.warning('%s of IPv6 fallbacks are on IPv6 ORPort %d'%(
+ CandidateList.describe_percentage(port_count,
+ fallback_count),
+ port))
+ return port_count
+
+ # log a message about the proportion of fallbacks on DirPort port
+ # and return that count
+ def describe_fallback_dirport(self, port):
+ port_count = len(self.fallbacks_on_dirport(port))
+ fallback_count = len(self.fallbacks)
+ logging.warning('%s of fallbacks are on DirPort %d'%(
+ CandidateList.describe_percentage(port_count,
+ fallback_count),
+ port))
+ return port_count
+
+ # log a message about the proportion of fallbacks on each dirport,
+ # each IPv4 orport, and each IPv6 orport
+ def describe_fallback_ports(self):
+ fallback_count = len(self.fallbacks)
+ ipv4_or_count = fallback_count
+ ipv4_or_count -= self.describe_fallback_ipv4_orport(443)
+ ipv4_or_count -= self.describe_fallback_ipv4_orport(9001)
+ logging.warning('%s of fallbacks are on other IPv4 ORPorts'%(
+ CandidateList.describe_percentage(ipv4_or_count,
+ fallback_count)))
+ ipv6_fallback_count = len(self.fallbacks_with_ipv6())
+ ipv6_or_count = ipv6_fallback_count
+ ipv6_or_count -= self.describe_fallback_ipv6_orport(443)
+ ipv6_or_count -= self.describe_fallback_ipv6_orport(9001)
+ logging.warning('%s of IPv6 fallbacks are on other IPv6 ORPorts'%(
+ CandidateList.describe_percentage(ipv6_or_count,
+ ipv6_fallback_count)))
+ dir_count = fallback_count
+ dir_count -= self.describe_fallback_dirport(80)
+ dir_count -= self.describe_fallback_dirport(9030)
+ logging.warning('%s of fallbacks are on other DirPorts'%(
+ CandidateList.describe_percentage(dir_count,
+ fallback_count)))
+
+ # return a list of fallbacks which have the Exit flag
+ def fallbacks_with_exit(self):
+ return filter(lambda x: x.is_exit(), self.fallbacks)
+
+ # log a message about the proportion of fallbacks with an Exit flag
+ def describe_fallback_exit_flag(self):
+ exit_falback_count = len(self.fallbacks_with_exit())
+ fallback_count = len(self.fallbacks)
+ logging.warning('%s of fallbacks have the Exit flag'%(
+ CandidateList.describe_percentage(exit_falback_count,
+ fallback_count)))
+
+ # return a list of fallbacks which have an IPv6 address
+ def fallbacks_with_ipv6(self):
+ return filter(lambda x: x.has_ipv6(), self.fallbacks)
+
+ # log a message about the proportion of fallbacks on IPv6
+ def describe_fallback_ip_family(self):
+ ipv6_falback_count = len(self.fallbacks_with_ipv6())
+ fallback_count = len(self.fallbacks)
+ logging.warning('%s of fallbacks are on IPv6'%(
+ CandidateList.describe_percentage(ipv6_falback_count,
+ fallback_count)))
- def summarise_fallbacks(self, eligible_count, guard_count, target_count,
- max_count):
+ def summarise_fallbacks(self, eligible_count, operator_count, failed_count,
+ guard_count, target_count):
# Report:
# whether we checked consensus download times
# the number of fallback directories (and limits/exclusions, if relevant)
@@ -1399,17 +1835,23 @@ class CandidateList(dict):
if FALLBACK_PROPORTION_OF_GUARDS is None:
fallback_proportion = ''
else:
- fallback_proportion = ', Target %d (%d * %f)'%(target_count, guard_count,
- FALLBACK_PROPORTION_OF_GUARDS)
- s += 'Final Count: %d (Eligible %d%s'%(fallback_count,
- eligible_count,
+ fallback_proportion = ', Target %d (%d * %.2f)'%(target_count,
+ guard_count,
+ FALLBACK_PROPORTION_OF_GUARDS)
+ s += 'Final Count: %d (Eligible %d%s'%(fallback_count, eligible_count,
fallback_proportion)
if MAX_FALLBACK_COUNT is not None:
- s += ', Clamped to %d'%(MAX_FALLBACK_COUNT)
+ s += ', Max %d'%(MAX_FALLBACK_COUNT)
s += ')\n'
if eligible_count != fallback_count:
- s += 'Excluded: %d (Eligible Count Exceeded Target Count)'%(
- eligible_count - fallback_count)
+ removed_count = eligible_count - fallback_count
+ excess_to_target_or_max = (eligible_count - operator_count - failed_count
+ - fallback_count)
+ # some 'Failed' failed the check, others 'Skipped' the check,
+ # if we already had enough successful downloads
+ s += ('Excluded: %d (Same Operator %d, Failed/Skipped Download %d, ' +
+ 'Excess %d)')%(removed_count, operator_count, failed_count,
+ excess_to_target_or_max)
s += '\n'
min_fb = self.fallback_min()
min_bw = min_fb._data['measured_bandwidth']
@@ -1473,18 +1915,46 @@ def list_fallbacks():
# print json.dumps(candidates[x]._data, sort_keys=True, indent=4,
# separators=(',', ': '), default=json_util.default)
+ # impose mandatory conditions here, like one per contact, family, IP
+ # in measured bandwidth order
+ candidates.sort_fallbacks_by_measured_bandwidth()
+ operator_count = 0
+ # only impose these limits on the final list - operators can nominate
+ # multiple candidate fallbacks, and then we choose the best set
+ if not OUTPUT_CANDIDATES:
+ operator_count += candidates.limit_fallbacks_same_ip()
+ operator_count += candidates.limit_fallbacks_same_contact()
+ operator_count += candidates.limit_fallbacks_same_family()
+
+ # check if each candidate can serve a consensus
+ # there's a small risk we've eliminated relays from the same operator that
+ # can serve a consensus, in favour of one that can't
+ # but given it takes up to 15 seconds to check each consensus download,
+ # the risk is worth it
+ failed_count = candidates.perform_download_consensus_checks(max_count)
+
+ # analyse and log interesting diversity metrics
+ # like netblock, ports, exit, IPv4-only
+ # (we can't easily analyse AS, and it's hard to accurately analyse country)
+ candidates.describe_fallback_ip_family()
+ # if we can't import the ipaddress module, we can't do netblock analysis
+ if HAVE_IPADDRESS:
+ candidates.describe_fallback_netblocks()
+ candidates.describe_fallback_ports()
+ candidates.describe_fallback_exit_flag()
+
+ # output C comments summarising the fallback selection process
if len(candidates.fallbacks) > 0:
- print candidates.summarise_fallbacks(eligible_count, guard_count,
- target_count, max_count)
+ print candidates.summarise_fallbacks(eligible_count, operator_count,
+ failed_count, guard_count,
+ target_count)
else:
print '/* No Fallbacks met criteria */'
+ # output C comments specifying the OnionOO data used to create the list
for s in fetch_source_list():
print describe_fetch_source(s)
- # check if each candidate can serve a consensus
- candidates.perform_download_consensus_checks(max_count)
-
# if we're outputting the final fallback list, sort by fingerprint
# this makes diffs much more stable
# otherwise, leave sorted by bandwidth, which allows operators to be