diff options
Diffstat (limited to 'scripts/maint')
-rwxr-xr-x | scripts/maint/updateFallbackDirs.py | 550 |
1 files changed, 510 insertions, 40 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py index 793ec7d924..4cfee5ddb5 100755 --- a/scripts/maint/updateFallbackDirs.py +++ b/scripts/maint/updateFallbackDirs.py @@ -2,9 +2,12 @@ # Usage: scripts/maint/updateFallbackDirs.py > src/or/fallback_dirs.inc # Needs stem available in your PYTHONPATH, or just ln -s ../stem/stem . +# Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package) +# for netblock analysis, in PYTHONPATH, or just +# ln -s ../py2-ipaddress-3.4.1/ipaddress.py . # -# Then read the generated list to ensure no-one slipped anything funny into -# their name or contactinfo +# Then read the logs to make sure the fallbacks aren't dominated by a single +# netblock or port # Script by weasel, April 2015 # Portions by gsathya & karsten, 2013 @@ -34,6 +37,21 @@ import logging # INFO tells you why each relay was included or excluded # WARN tells you about potential misconfigurations logging.basicConfig(level=logging.WARNING) +logging.root.name = '' +# INFO tells you about each consensus download attempt +logging.getLogger('stem').setLevel(logging.WARNING) + +HAVE_IPADDRESS = False +try: + # python 3 builtin, or install package py2-ipaddress + # there are several ipaddress implementations for python 2 + # with slightly different semantics with str typed text + # fortunately, all our IP addresses are in unicode + import ipaddress + HAVE_IPADDRESS = True +except ImportError: + # if this happens, we avoid doing netblock analysis + logging.warning('Unable to import ipaddress, please install py2-ipaddress') ## Top-Level Configuration @@ -468,6 +486,9 @@ class Candidate(object): # relays without advertised bandwdith have it calculated from their # consensus weight details['advertised_bandwidth'] = 0 + if (not 'effective_family' in details + or details['effective_family'] is None): + details['effective_family'] = [] details['last_changed_address_or_port'] = parse_ts( details['last_changed_address_or_port']) self._data = details @@ -480,7 +501,7 @@ class Candidate(object): if self.orport is None: raise Exception("Failed to get an orport for %s."%(self._fpr,)) self._compute_ipv6addr() - if self.ipv6addr is None: + if not self.has_ipv6(): logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,)) def _stable_sort_or_addresses(self): @@ -584,14 +605,14 @@ class Candidate(object): (ipaddr, port) = i.rsplit(':', 1) if (port == self.orport) and Candidate.is_valid_ipv6_address(ipaddr): self.ipv6addr = ipaddr - self.ipv6orport = port + self.ipv6orport = int(port) return # Choose the first IPv6 address in the list for i in self._data['or_addresses']: (ipaddr, port) = i.rsplit(':', 1) if Candidate.is_valid_ipv6_address(ipaddr): self.ipv6addr = ipaddr - self.ipv6orport = port + self.ipv6orport = int(port) return @staticmethod @@ -804,9 +825,10 @@ class Candidate(object): 'ORPort (%d) does not match entry ORPort (%d)', self._fpr, self.orport, int(entry['orport'])) continue - has_ipv6 = self.ipv6addr is not None and self.ipv6orport is not None - if (entry.has_key('ipv6') and has_ipv6): - ipv6 = self.ipv6addr + ':' + self.ipv6orport + ipv6 = None + if self.has_ipv6(): + ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport) + if entry.has_key('ipv6') and self.has_ipv6(): # if both entry and fallback have an ipv6 address, compare them if entry['ipv6'] != ipv6: logging.info('%s is not in the whitelist: fingerprint matches, ' + @@ -815,14 +837,14 @@ class Candidate(object): continue # if the fallback has an IPv6 address but the whitelist entry # doesn't, or vice versa, the whitelist entry doesn't match - elif entry.has_key('ipv6') and not has_ipv6: + elif entry.has_key('ipv6') and not self.has_ipv6(): logging.info('%s is not in the whitelist: fingerprint matches, but ' + 'it has no IPv6, and entry has IPv6 (%s)', self._fpr, entry['ipv6']) logging.warning('%s excluded: has it lost its former IPv6 address %s?', self._fpr, entry['ipv6']) continue - elif not entry.has_key('ipv6') and has_ipv6: + elif not entry.has_key('ipv6') and self.has_ipv6(): logging.info('%s is not in the whitelist: fingerprint matches, but ' + 'it has IPv6 (%s), and entry has no IPv6', self._fpr, ipv6) @@ -871,9 +893,10 @@ class Candidate(object): 'entry has no DirPort or ORPort', self._fpr, self.dirip) return True - has_ipv6 = self.ipv6addr is not None and self.ipv6orport is not None - ipv6 = (self.ipv6addr + ':' + self.ipv6orport) if has_ipv6 else None - if (key == 'ipv6' and has_ipv6): + ipv6 = None + if self.has_ipv6(): + ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport) + if (key == 'ipv6' and self.has_ipv6()): # if both entry and fallback have an ipv6 address, compare them, # otherwise, disregard ipv6 addresses if value == ipv6: @@ -889,18 +912,18 @@ class Candidate(object): logging.info('%s is in the blacklist: IPv6 (%s) matches, and' + 'entry has no DirPort', self._fpr, ipv6) return True - elif (key == 'ipv6' or has_ipv6): + elif (key == 'ipv6' or self.has_ipv6()): # only log if the fingerprint matches but the IPv6 doesn't if entry.has_key('id') and entry['id'] == self._fpr: logging.info('%s skipping IPv6 blacklist comparison: relay ' + 'has%s IPv6%s, but entry has%s IPv6%s', self._fpr, - '' if has_ipv6 else ' no', - (' (' + ipv6 + ')') if has_ipv6 else '', + '' if self.has_ipv6() else ' no', + (' (' + ipv6 + ')') if self.has_ipv6() else '', '' if key == 'ipv6' else ' no', (' (' + value + ')') if key == 'ipv6' else '') logging.warning('Has %s %s IPv6 address %s?', self._fpr, - 'gained an' if has_ipv6 else 'lost its former', - ipv6 if has_ipv6 else value) + 'gained an' if self.has_ipv6() else 'lost its former', + ipv6 if self.has_ipv6() else value) return False def cw_to_bw_factor(self): @@ -936,6 +959,101 @@ class Candidate(object): def is_running(self): return 'Running' in self._data['flags'] + # does this fallback have an IPv6 address and orport? + def has_ipv6(self): + return self.ipv6addr is not None and self.ipv6orport is not None + + # strip leading and trailing brackets from an IPv6 address + # safe to use on non-bracketed IPv6 and on IPv4 addresses + # also convert to unicode, and make None appear as '' + @staticmethod + def strip_ipv6_brackets(ip): + if ip is None: + return unicode('') + if len(ip) < 2: + return unicode(ip) + if ip[0] == '[' and ip[-1] == ']': + return unicode(ip[1:-1]) + return unicode(ip) + + # are ip_a and ip_b in the same netblock? + # mask_bits is the size of the netblock + # takes both IPv4 and IPv6 addresses + # the versions of ip_a and ip_b must be the same + # the mask must be valid for the IP version + @staticmethod + def netblocks_equal(ip_a, ip_b, mask_bits): + if ip_a is None or ip_b is None: + return False + ip_a = Candidate.strip_ipv6_brackets(ip_a) + ip_b = Candidate.strip_ipv6_brackets(ip_b) + a = ipaddress.ip_address(ip_a) + b = ipaddress.ip_address(ip_b) + if a.version != b.version: + raise Exception('Mismatching IP versions in %s and %s'%(ip_a, ip_b)) + if mask_bits > a.max_prefixlen: + logging.warning('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b)) + mask_bits = a.max_prefixlen + if mask_bits < 0: + logging.warning('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b)) + mask_bits = 0 + a_net = ipaddress.ip_network('%s/%d'%(ip_a, mask_bits), strict=False) + return b in a_net + + # is this fallback's IPv4 address (dirip) in the same netblock as other's + # IPv4 address? + # mask_bits is the size of the netblock + def ipv4_netblocks_equal(self, other, mask_bits): + return Candidate.netblocks_equal(self.dirip, other.dirip, mask_bits) + + # is this fallback's IPv6 address (ipv6addr) in the same netblock as + # other's IPv6 address? + # Returns False if either fallback has no IPv6 address + # mask_bits is the size of the netblock + def ipv6_netblocks_equal(self, other, mask_bits): + if not self.has_ipv6() or not other.has_ipv6(): + return False + return Candidate.netblocks_equal(self.ipv6addr, other.ipv6addr, mask_bits) + + # is this fallback's IPv4 DirPort the same as other's IPv4 DirPort? + def dirport_equal(self, other): + return self.dirport == other.dirport + + # is this fallback's IPv4 ORPort the same as other's IPv4 ORPort? + def ipv4_orport_equal(self, other): + return self.orport == other.orport + + # is this fallback's IPv6 ORPort the same as other's IPv6 ORPort? + # Returns False if either fallback has no IPv6 address + def ipv6_orport_equal(self, other): + if not self.has_ipv6() or not other.has_ipv6(): + return False + return self.ipv6orport == other.ipv6orport + + # does this fallback have the same DirPort, IPv4 ORPort, or + # IPv6 ORPort as other? + # Ignores IPv6 ORPort if either fallback has no IPv6 address + def port_equal(self, other): + return (self.dirport_equal(other) or self.ipv4_orport_equal(other) + or self.ipv6_orport_equal(other)) + + # return a list containing IPv4 ORPort, DirPort, and IPv6 ORPort (if present) + def port_list(self): + ports = [self.dirport, self.orport] + if self.has_ipv6() and not self.ipv6orport in ports: + ports.append(self.ipv6orport) + return ports + + # does this fallback share a port with other, regardless of whether the + # port types match? + # For example, if self's IPv4 ORPort is 80 and other's DirPort is 80, + # return True + def port_shared(self, other): + for p in self.port_list(): + if p in other.port_list(): + return True + return False + # report how long it takes to download a consensus from dirip:dirport @staticmethod def fallback_consensus_download_speed(dirip, dirport, nickname, max_time): @@ -984,7 +1102,7 @@ class Candidate(object): self.dirport, self._data['nickname'], CONSENSUS_DOWNLOAD_SPEED_MAX) - if self.ipv6addr is not None and PERFORM_IPV6_DIRPORT_CHECKS: + if self.has_ipv6() and PERFORM_IPV6_DIRPORT_CHECKS: # Clients assume the IPv6 DirPort is the same as the IPv4 DirPort ipv6_failed = Candidate.fallback_consensus_download_speed(self.ipv6addr, self.dirport, @@ -1086,9 +1204,8 @@ class Candidate(object): self.orport, cleanse_c_string(self._fpr)) s += '\n' - if self.ipv6addr is not None: - s += '" ipv6=%s:%s"'%( - cleanse_c_string(self.ipv6addr), cleanse_c_string(self.ipv6orport)) + if self.has_ipv6(): + s += '" ipv6=%s:%d"'%(cleanse_c_string(self.ipv6addr), self.ipv6orport) s += '\n' s += '" weight=%d",'%(FALLBACK_OUTPUT_WEIGHT) if comment_string: @@ -1126,7 +1243,7 @@ class CandidateList(dict): d = fetch('details', fields=('fingerprint,nickname,contact,last_changed_address_or_port,' + 'consensus_weight,advertised_bandwidth,or_addresses,' + - 'dir_address,recommended_version,flags')) + 'dir_address,recommended_version,flags,effective_family')) logging.debug('Loading details document done.') if not 'relays' in d: raise Exception("No relays found in document.") @@ -1163,19 +1280,19 @@ class CandidateList(dict): # lowest to highest # used to find the median cw_to_bw_factor() def sort_fallbacks_by_cw_to_bw_factor(self): - self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor(), self.fallbacks) + self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor()) # sort fallbacks by their measured bandwidth, highest to lowest # calculate_measured_bandwidth before calling this # this is useful for reviewing candidates in priority order def sort_fallbacks_by_measured_bandwidth(self): self.fallbacks.sort(key=lambda f: f._data['measured_bandwidth'], - self.fallbacks, reverse=True) + reverse=True) # sort fallbacks by their fingerprint, lowest to highest # this is useful for stable diffs of fallback lists def sort_fallbacks_by_fingerprint(self): - self.fallbacks.sort(key=lambda f: self[f]._fpr, self.fallbacks) + self.fallbacks.sort(key=lambda f: f._fpr) @staticmethod def load_relaylist(file_name): @@ -1341,6 +1458,91 @@ class CandidateList(dict): else: return None + # does exclusion_list contain attribute? + # if so, return False + # if not, return True + # if attribute is None or the empty string, always return True + @staticmethod + def allow(attribute, exclusion_list): + if attribute is None or attribute == '': + return True + elif attribute in exclusion_list: + return False + else: + return True + + # make sure there is only one fallback per IPv4 address, and per IPv6 address + # there is only one IPv4 address on each fallback: the IPv4 DirPort address + # (we choose the IPv4 ORPort which is on the same IPv4 as the DirPort) + # there is at most one IPv6 address on each fallback: the IPv6 ORPort address + # we try to match the IPv4 ORPort, but will use any IPv6 address if needed + # (clients assume the IPv6 DirPort is the same as the IPv4 DirPort, but + # typically only use the IPv6 ORPort) + # if there is no IPv6 address, only the IPv4 address is checked + # return the number of candidates we excluded + def limit_fallbacks_same_ip(self): + ip_limit_fallbacks = [] + ip_list = [] + for f in self.fallbacks: + if (CandidateList.allow(f.dirip, ip_list) + and CandidateList.allow(f.ipv6addr, ip_list)): + ip_limit_fallbacks.append(f) + ip_list.append(f.dirip) + if f.has_ipv6(): + ip_list.append(f.ipv6addr) + elif not CandidateList.allow(f.dirip, ip_list): + logging.debug('Eliminated %s: already have fallback on IPv4 %s'%( + f._fpr, f.dirip)) + elif f.has_ipv6() and not CandidateList.allow(f.ipv6addr, ip_list): + logging.debug('Eliminated %s: already have fallback on IPv6 %s'%( + f._fpr, f.ipv6addr)) + original_count = len(self.fallbacks) + self.fallbacks = ip_limit_fallbacks + return original_count - len(self.fallbacks) + + # make sure there is only one fallback per ContactInfo + # if there is no ContactInfo, allow the fallback + # this check can be gamed by providing no ContactInfo, or by setting the + # ContactInfo to match another fallback + # However, given the likelihood that relays with the same ContactInfo will + # go down at similar times, its usefulness outweighs the risk + def limit_fallbacks_same_contact(self): + contact_limit_fallbacks = [] + contact_list = [] + for f in self.fallbacks: + if CandidateList.allow(f._data['contact'], contact_list): + contact_limit_fallbacks.append(f) + contact_list.append(f._data['contact']) + else: + logging.debug(('Eliminated %s: already have fallback on ' + + 'ContactInfo %s')%(f._fpr, f._data['contact'])) + original_count = len(self.fallbacks) + self.fallbacks = contact_limit_fallbacks + return original_count - len(self.fallbacks) + + # make sure there is only one fallback per effective family + # if there is no family, allow the fallback + # this check can't be gamed, because we use effective family, which ensures + # mutual family declarations + # if any indirect families exist, the result depends on the order in which + # fallbacks are sorted in the list + def limit_fallbacks_same_family(self): + family_limit_fallbacks = [] + fingerprint_list = [] + for f in self.fallbacks: + if CandidateList.allow(f._fpr, fingerprint_list): + family_limit_fallbacks.append(f) + fingerprint_list.append(f._fpr) + fingerprint_list.extend(f._data['effective_family']) + else: + # technically, we already have a fallback with this fallback in its + # effective family + logging.debug('Eliminated %s: already have fallback in effective ' + + 'family'%(f._fpr)) + original_count = len(self.fallbacks) + self.fallbacks = family_limit_fallbacks + return original_count - len(self.fallbacks) + # try a download check on each fallback candidate in order # stop after max_count successful downloads # but don't remove any candidates from the array @@ -1361,6 +1563,7 @@ class CandidateList(dict): # - eliminate failed candidates # - if there are more than max_count candidates, eliminate lowest bandwidth # - if there are fewer than max_count candidates, leave only successful + # Return the number of fallbacks that failed the consensus check def perform_download_consensus_checks(self, max_count): self.sort_fallbacks_by_measured_bandwidth() self.try_download_consensus_checks(max_count) @@ -1370,12 +1573,245 @@ class CandidateList(dict): self.try_download_consensus_checks(max_count) # now we have at least max_count successful candidates, # or we've tried them all + original_count = len(self.fallbacks) self.fallbacks = filter(lambda x: x.get_fallback_download_consensus(), self.fallbacks) + # some of these failed the check, others skipped the check, + # if we already had enough successful downloads + failed_count = original_count - len(self.fallbacks) self.fallbacks = self.fallbacks[:max_count] + return failed_count + + # return a string that describes a/b as a percentage + @staticmethod + def describe_percentage(a, b): + return '%d/%d = %.0f%%'%(a, b, (a*100.0)/b) + + # return a dictionary of lists of fallbacks by IPv4 netblock + # the dictionary is keyed by the fingerprint of an arbitrary fallback + # in each netblock + # mask_bits is the size of the netblock + def fallbacks_by_ipv4_netblock(self, mask_bits): + netblocks = {} + for f in self.fallbacks: + found_netblock = False + for b in netblocks.keys(): + # we found an existing netblock containing this fallback + if f.ipv4_netblocks_equal(self[b], mask_bits): + # add it to the list + netblocks[b].append(f) + found_netblock = True + break + # make a new netblock based on this fallback's fingerprint + if not found_netblock: + netblocks[f._fpr] = [f] + return netblocks + + # return a dictionary of lists of fallbacks by IPv6 netblock + # where mask_bits is the size of the netblock + def fallbacks_by_ipv6_netblock(self, mask_bits): + netblocks = {} + for f in self.fallbacks: + # skip fallbacks without IPv6 addresses + if not f.has_ipv6(): + continue + found_netblock = False + for b in netblocks.keys(): + # we found an existing netblock containing this fallback + if f.ipv6_netblocks_equal(self[b], mask_bits): + # add it to the list + netblocks[b].append(f) + found_netblock = True + break + # make a new netblock based on this fallback's fingerprint + if not found_netblock: + netblocks[f._fpr] = [f] + return netblocks + + # log a message about the proportion of fallbacks in each IPv4 netblock, + # where mask_bits is the size of the netblock + def describe_fallback_ipv4_netblock_mask(self, mask_bits): + fallback_count = len(self.fallbacks) + shared_netblock_fallback_count = 0 + most_frequent_netblock = None + netblocks = self.fallbacks_by_ipv4_netblock(mask_bits) + for b in netblocks.keys(): + if len(netblocks[b]) > 1: + # how many fallbacks are in a netblock with other fallbacks? + shared_netblock_fallback_count += len(netblocks[b]) + # what's the netblock with the most fallbacks? + if (most_frequent_netblock is None + or len(netblocks[b]) > len(netblocks[most_frequent_netblock])): + most_frequent_netblock = b + logging.debug('Fallback IPv4 addresses in the same /%d:'%(mask_bits)) + for f in netblocks[b]: + logging.debug('%s - %s', f.dirip, f._fpr) + if most_frequent_netblock is not None: + logging.warning('There are %s fallbacks in the IPv4 /%d containing %s'%( + CandidateList.describe_percentage( + len(netblocks[most_frequent_netblock]), + fallback_count), + mask_bits, + self[most_frequent_netblock].dirip)) + if shared_netblock_fallback_count > 0: + logging.warning(('%s of fallbacks are in an IPv4 /%d with other ' + + 'fallbacks')%(CandidateList.describe_percentage( + shared_netblock_fallback_count, + fallback_count), + mask_bits)) + + # log a message about the proportion of fallbacks in each IPv6 netblock, + # where mask_bits is the size of the netblock + def describe_fallback_ipv6_netblock_mask(self, mask_bits): + fallback_count = len(self.fallbacks_with_ipv6()) + shared_netblock_fallback_count = 0 + most_frequent_netblock = None + netblocks = self.fallbacks_by_ipv6_netblock(mask_bits) + for b in netblocks.keys(): + if len(netblocks[b]) > 1: + # how many fallbacks are in a netblock with other fallbacks? + shared_netblock_fallback_count += len(netblocks[b]) + # what's the netblock with the most fallbacks? + if (most_frequent_netblock is None + or len(netblocks[b]) > len(netblocks[most_frequent_netblock])): + most_frequent_netblock = b + logging.debug('Fallback IPv6 addresses in the same /%d:'%(mask_bits)) + for f in netblocks[b]: + logging.debug('%s - %s', f.ipv6addr, f._fpr) + if most_frequent_netblock is not None: + logging.warning('There are %s fallbacks in the IPv6 /%d containing %s'%( + CandidateList.describe_percentage( + len(netblocks[most_frequent_netblock]), + fallback_count), + mask_bits, + self[most_frequent_netblock].ipv6addr)) + if shared_netblock_fallback_count > 0: + logging.warning(('%s of fallbacks are in an IPv6 /%d with other ' + + 'fallbacks')%(CandidateList.describe_percentage( + shared_netblock_fallback_count, + fallback_count), + mask_bits)) + + # log a message about the proportion of fallbacks in each IPv4 /8, /16, + # and /24 + def describe_fallback_ipv4_netblocks(self): + # this doesn't actually tell us anything useful + #self.describe_fallback_ipv4_netblock_mask(8) + self.describe_fallback_ipv4_netblock_mask(16) + self.describe_fallback_ipv4_netblock_mask(24) + + # log a message about the proportion of fallbacks in each IPv6 /12 (RIR), + # /23 (smaller RIR blocks), /32 (LIR), /48 (Customer), and /64 (Host) + # https://www.iana.org/assignments/ipv6-unicast-address-assignments/ + def describe_fallback_ipv6_netblocks(self): + # these don't actually tell us anything useful + #self.describe_fallback_ipv6_netblock_mask(12) + #self.describe_fallback_ipv6_netblock_mask(23) + self.describe_fallback_ipv6_netblock_mask(32) + self.describe_fallback_ipv6_netblock_mask(48) + self.describe_fallback_ipv6_netblock_mask(64) + + # log a message about the proportion of fallbacks in each IPv4 and IPv6 + # netblock + def describe_fallback_netblocks(self): + self.describe_fallback_ipv4_netblocks() + self.describe_fallback_ipv6_netblocks() + + # return a list of fallbacks which are on the IPv4 ORPort port + def fallbacks_on_ipv4_orport(self, port): + return filter(lambda x: x.orport == port, self.fallbacks) + + # return a list of fallbacks which are on the IPv6 ORPort port + def fallbacks_on_ipv6_orport(self, port): + return filter(lambda x: x.ipv6orport == port, self.fallbacks_with_ipv6()) + + # return a list of fallbacks which are on the DirPort port + def fallbacks_on_dirport(self, port): + return filter(lambda x: x.dirport == port, self.fallbacks) + + # log a message about the proportion of fallbacks on IPv4 ORPort port + # and return that count + def describe_fallback_ipv4_orport(self, port): + port_count = len(self.fallbacks_on_ipv4_orport(port)) + fallback_count = len(self.fallbacks) + logging.warning('%s of fallbacks are on IPv4 ORPort %d'%( + CandidateList.describe_percentage(port_count, + fallback_count), + port)) + return port_count + + # log a message about the proportion of IPv6 fallbacks on IPv6 ORPort port + # and return that count + def describe_fallback_ipv6_orport(self, port): + port_count = len(self.fallbacks_on_ipv6_orport(port)) + fallback_count = len(self.fallbacks_with_ipv6()) + logging.warning('%s of IPv6 fallbacks are on IPv6 ORPort %d'%( + CandidateList.describe_percentage(port_count, + fallback_count), + port)) + return port_count + + # log a message about the proportion of fallbacks on DirPort port + # and return that count + def describe_fallback_dirport(self, port): + port_count = len(self.fallbacks_on_dirport(port)) + fallback_count = len(self.fallbacks) + logging.warning('%s of fallbacks are on DirPort %d'%( + CandidateList.describe_percentage(port_count, + fallback_count), + port)) + return port_count + + # log a message about the proportion of fallbacks on each dirport, + # each IPv4 orport, and each IPv6 orport + def describe_fallback_ports(self): + fallback_count = len(self.fallbacks) + ipv4_or_count = fallback_count + ipv4_or_count -= self.describe_fallback_ipv4_orport(443) + ipv4_or_count -= self.describe_fallback_ipv4_orport(9001) + logging.warning('%s of fallbacks are on other IPv4 ORPorts'%( + CandidateList.describe_percentage(ipv4_or_count, + fallback_count))) + ipv6_fallback_count = len(self.fallbacks_with_ipv6()) + ipv6_or_count = ipv6_fallback_count + ipv6_or_count -= self.describe_fallback_ipv6_orport(443) + ipv6_or_count -= self.describe_fallback_ipv6_orport(9001) + logging.warning('%s of IPv6 fallbacks are on other IPv6 ORPorts'%( + CandidateList.describe_percentage(ipv6_or_count, + ipv6_fallback_count))) + dir_count = fallback_count + dir_count -= self.describe_fallback_dirport(80) + dir_count -= self.describe_fallback_dirport(9030) + logging.warning('%s of fallbacks are on other DirPorts'%( + CandidateList.describe_percentage(dir_count, + fallback_count))) + + # return a list of fallbacks which have the Exit flag + def fallbacks_with_exit(self): + return filter(lambda x: x.is_exit(), self.fallbacks) + + # log a message about the proportion of fallbacks with an Exit flag + def describe_fallback_exit_flag(self): + exit_falback_count = len(self.fallbacks_with_exit()) + fallback_count = len(self.fallbacks) + logging.warning('%s of fallbacks have the Exit flag'%( + CandidateList.describe_percentage(exit_falback_count, + fallback_count))) + + # return a list of fallbacks which have an IPv6 address + def fallbacks_with_ipv6(self): + return filter(lambda x: x.has_ipv6(), self.fallbacks) + + # log a message about the proportion of fallbacks on IPv6 + def describe_fallback_ip_family(self): + ipv6_falback_count = len(self.fallbacks_with_ipv6()) + fallback_count = len(self.fallbacks) + logging.warning('%s of fallbacks are on IPv6'%( + CandidateList.describe_percentage(ipv6_falback_count, + fallback_count))) - def summarise_fallbacks(self, eligible_count, guard_count, target_count, - max_count): + def summarise_fallbacks(self, eligible_count, operator_count, failed_count, + guard_count, target_count): # Report: # whether we checked consensus download times # the number of fallback directories (and limits/exclusions, if relevant) @@ -1399,17 +1835,23 @@ class CandidateList(dict): if FALLBACK_PROPORTION_OF_GUARDS is None: fallback_proportion = '' else: - fallback_proportion = ', Target %d (%d * %f)'%(target_count, guard_count, - FALLBACK_PROPORTION_OF_GUARDS) - s += 'Final Count: %d (Eligible %d%s'%(fallback_count, - eligible_count, + fallback_proportion = ', Target %d (%d * %.2f)'%(target_count, + guard_count, + FALLBACK_PROPORTION_OF_GUARDS) + s += 'Final Count: %d (Eligible %d%s'%(fallback_count, eligible_count, fallback_proportion) if MAX_FALLBACK_COUNT is not None: - s += ', Clamped to %d'%(MAX_FALLBACK_COUNT) + s += ', Max %d'%(MAX_FALLBACK_COUNT) s += ')\n' if eligible_count != fallback_count: - s += 'Excluded: %d (Eligible Count Exceeded Target Count)'%( - eligible_count - fallback_count) + removed_count = eligible_count - fallback_count + excess_to_target_or_max = (eligible_count - operator_count - failed_count + - fallback_count) + # some 'Failed' failed the check, others 'Skipped' the check, + # if we already had enough successful downloads + s += ('Excluded: %d (Same Operator %d, Failed/Skipped Download %d, ' + + 'Excess %d)')%(removed_count, operator_count, failed_count, + excess_to_target_or_max) s += '\n' min_fb = self.fallback_min() min_bw = min_fb._data['measured_bandwidth'] @@ -1473,18 +1915,46 @@ def list_fallbacks(): # print json.dumps(candidates[x]._data, sort_keys=True, indent=4, # separators=(',', ': '), default=json_util.default) + # impose mandatory conditions here, like one per contact, family, IP + # in measured bandwidth order + candidates.sort_fallbacks_by_measured_bandwidth() + operator_count = 0 + # only impose these limits on the final list - operators can nominate + # multiple candidate fallbacks, and then we choose the best set + if not OUTPUT_CANDIDATES: + operator_count += candidates.limit_fallbacks_same_ip() + operator_count += candidates.limit_fallbacks_same_contact() + operator_count += candidates.limit_fallbacks_same_family() + + # check if each candidate can serve a consensus + # there's a small risk we've eliminated relays from the same operator that + # can serve a consensus, in favour of one that can't + # but given it takes up to 15 seconds to check each consensus download, + # the risk is worth it + failed_count = candidates.perform_download_consensus_checks(max_count) + + # analyse and log interesting diversity metrics + # like netblock, ports, exit, IPv4-only + # (we can't easily analyse AS, and it's hard to accurately analyse country) + candidates.describe_fallback_ip_family() + # if we can't import the ipaddress module, we can't do netblock analysis + if HAVE_IPADDRESS: + candidates.describe_fallback_netblocks() + candidates.describe_fallback_ports() + candidates.describe_fallback_exit_flag() + + # output C comments summarising the fallback selection process if len(candidates.fallbacks) > 0: - print candidates.summarise_fallbacks(eligible_count, guard_count, - target_count, max_count) + print candidates.summarise_fallbacks(eligible_count, operator_count, + failed_count, guard_count, + target_count) else: print '/* No Fallbacks met criteria */' + # output C comments specifying the OnionOO data used to create the list for s in fetch_source_list(): print describe_fetch_source(s) - # check if each candidate can serve a consensus - candidates.perform_download_consensus_checks(max_count) - # if we're outputting the final fallback list, sort by fingerprint # this makes diffs much more stable # otherwise, leave sorted by bandwidth, which allows operators to be |