1 files changed, 510 insertions, 40 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
index 793ec7d924..4cfee5ddb5 100755
--- a/scripts/maint/updateFallbackDirs.py
+++ b/scripts/maint/updateFallbackDirs.py
@@ -2,9 +2,12 @@
 
 # Usage: scripts/maint/updateFallbackDirs.py > src/or/fallback_dirs.inc
 # Needs stem available in your PYTHONPATH, or just ln -s ../stem/stem .
+# Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package)
+# for netblock analysis, in PYTHONPATH, or just
+# ln -s ../py2-ipaddress-3.4.1/ipaddress.py .
 #
-# Then read the generated list to ensure no-one slipped anything funny into
-# their name or contactinfo
+# Then read the logs to make sure the fallbacks aren't dominated by a single
+# netblock or port
 
 # Script by weasel, April 2015
 # Portions by gsathya & karsten, 2013
@@ -34,6 +37,21 @@ import logging
 # INFO tells you why each relay was included or excluded
 # WARN tells you about potential misconfigurations
 logging.basicConfig(level=logging.WARNING)
+logging.root.name = ''
+# INFO tells you about each consensus download attempt
+logging.getLogger('stem').setLevel(logging.WARNING)
+
+HAVE_IPADDRESS = False
+try:
+  # python 3 builtin, or install package py2-ipaddress
+  # there are several ipaddress implementations for python 2
+  # with slightly different semantics with str typed text
+  # fortunately, all our IP addresses are in unicode
+  import ipaddress
+  HAVE_IPADDRESS = True
+except ImportError:
+  # if this happens, we avoid doing netblock analysis
+  logging.warning('Unable to import ipaddress, please install py2-ipaddress')
 
 ## Top-Level Configuration
 
@@ -468,6 +486,9 @@ class Candidate(object):
       # relays without advertised bandwdith have it calculated from their
       # consensus weight
       details['advertised_bandwidth'] = 0
+    if (not 'effective_family' in details
+        or details['effective_family'] is None):
+      details['effective_family'] = []
     details['last_changed_address_or_port'] = parse_ts(
                                       details['last_changed_address_or_port'])
     self._data = details
@@ -480,7 +501,7 @@ class Candidate(object):
     if self.orport is None:
       raise Exception("Failed to get an orport for %s."%(self._fpr,))
     self._compute_ipv6addr()
-    if self.ipv6addr is None:
+    if not self.has_ipv6():
       logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
 
   def _stable_sort_or_addresses(self):
@@ -584,14 +605,14 @@ class Candidate(object):
       (ipaddr, port) = i.rsplit(':', 1)
       if (port == self.orport) and Candidate.is_valid_ipv6_address(ipaddr):
         self.ipv6addr = ipaddr
-        self.ipv6orport = port
+        self.ipv6orport = int(port)
         return
     # Choose the first IPv6 address in the list
     for i in self._data['or_addresses']:
       (ipaddr, port) = i.rsplit(':', 1)
       if Candidate.is_valid_ipv6_address(ipaddr):
         self.ipv6addr = ipaddr
-        self.ipv6orport = port
+        self.ipv6orport = int(port)
         return
 
   @staticmethod
@@ -804,9 +825,10 @@ class Candidate(object):
                      'ORPort (%d) does not match entry ORPort (%d)',
                      self._fpr, self.orport, int(entry['orport']))
         continue
-      has_ipv6 = self.ipv6addr is not None and self.ipv6orport is not None
-      if (entry.has_key('ipv6') and has_ipv6):
-        ipv6 = self.ipv6addr + ':' + self.ipv6orport
+      ipv6 = None
+      if self.has_ipv6():
+        ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
+      if entry.has_key('ipv6') and self.has_ipv6():
         # if both entry and fallback have an ipv6 address, compare them
         if entry['ipv6'] != ipv6:
           logging.info('%s is not in the whitelist: fingerprint matches, ' +
@@ -815,14 +837,14 @@ class Candidate(object):
           continue
       # if the fallback has an IPv6 address but the whitelist entry
       # doesn't, or vice versa, the whitelist entry doesn't match
-      elif entry.has_key('ipv6') and not has_ipv6:
+      elif entry.has_key('ipv6') and not self.has_ipv6():
         logging.info('%s is not in the whitelist: fingerprint matches, but ' +
                      'it has no IPv6, and entry has IPv6 (%s)', self._fpr,
                      entry['ipv6'])
         logging.warning('%s excluded: has it lost its former IPv6 address %s?',
                         self._fpr, entry['ipv6'])
         continue
-      elif not entry.has_key('ipv6') and has_ipv6:
+      elif not entry.has_key('ipv6') and self.has_ipv6():
         logging.info('%s is not in the whitelist: fingerprint matches, but ' +
                      'it has IPv6 (%s), and entry has no IPv6', self._fpr,
                      ipv6)
@@ -871,9 +893,10 @@ class Candidate(object):
                          'entry has no DirPort or ORPort', self._fpr,
                          self.dirip)
             return True
-        has_ipv6 = self.ipv6addr is not None and self.ipv6orport is not None
-        ipv6 = (self.ipv6addr + ':' + self.ipv6orport) if has_ipv6 else None
-        if (key == 'ipv6' and has_ipv6):
+        ipv6 = None
+        if self.has_ipv6():
+          ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
+        if (key == 'ipv6' and self.has_ipv6()):
         # if both entry and fallback have an ipv6 address, compare them,
         # otherwise, disregard ipv6 addresses
           if value == ipv6:
@@ -889,18 +912,18 @@ class Candidate(object):
               logging.info('%s is in the blacklist: IPv6 (%s) matches, and' +
                            'entry has no DirPort', self._fpr, ipv6)
               return True
-        elif (key == 'ipv6' or has_ipv6):
+        elif (key == 'ipv6' or self.has_ipv6()):
           # only log if the fingerprint matches but the IPv6 doesn't
           if entry.has_key('id') and entry['id'] == self._fpr:
             logging.info('%s skipping IPv6 blacklist comparison: relay ' +
                          'has%s IPv6%s, but entry has%s IPv6%s', self._fpr,
-                         '' if has_ipv6 else ' no',
-                         (' (' + ipv6 + ')') if has_ipv6 else  '',
+                         '' if self.has_ipv6() else ' no',
+                         (' (' + ipv6 + ')') if self.has_ipv6() else  '',
                          '' if key == 'ipv6' else ' no',
                          (' (' + value + ')') if key == 'ipv6' else '')
             logging.warning('Has %s %s IPv6 address %s?', self._fpr,
-                            'gained an' if has_ipv6 else 'lost its former',
-                            ipv6 if has_ipv6 else value)
+                        'gained an' if self.has_ipv6() else 'lost its former',
+                        ipv6 if self.has_ipv6() else value)
     return False
 
   def cw_to_bw_factor(self):
@@ -936,6 +959,101 @@ class Candidate(object):
   def is_running(self):
     return 'Running' in self._data['flags']
 
+  # does this fallback have an IPv6 address and orport?
+  def has_ipv6(self):
+    return self.ipv6addr is not None and self.ipv6orport is not None
+
+  # strip leading and trailing brackets from an IPv6 address
+  # safe to use on non-bracketed IPv6 and on IPv4 addresses
+  # also convert to unicode, and make None appear as ''
+  @staticmethod
+  def strip_ipv6_brackets(ip):
+    if ip is None:
+      return unicode('')
+    if len(ip) < 2:
+      return unicode(ip)
+    if ip[0] == '[' and ip[-1] == ']':
+      return unicode(ip[1:-1])
+    return unicode(ip)
+
+  # are ip_a and ip_b in the same netblock?
+  # mask_bits is the size of the netblock
+  # takes both IPv4 and IPv6 addresses
+  # the versions of ip_a and ip_b must be the same
+  # the mask must be valid for the IP version
+  @staticmethod
+  def netblocks_equal(ip_a, ip_b, mask_bits):
+    if ip_a is None or ip_b is None:
+      return False
+    ip_a = Candidate.strip_ipv6_brackets(ip_a)
+    ip_b = Candidate.strip_ipv6_brackets(ip_b)
+    a = ipaddress.ip_address(ip_a)
+    b = ipaddress.ip_address(ip_b)
+    if a.version != b.version:
+      raise Exception('Mismatching IP versions in %s and %s'%(ip_a, ip_b))
+    if mask_bits > a.max_prefixlen:
+      logging.warning('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b))
+      mask_bits = a.max_prefixlen
+    if mask_bits < 0:
+      logging.warning('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b))
+      mask_bits = 0
+    a_net = ipaddress.ip_network('%s/%d'%(ip_a, mask_bits), strict=False)
+    return b in a_net
+
+  # is this fallback's IPv4 address (dirip) in the same netblock as other's
+  # IPv4 address?
+  # mask_bits is the size of the netblock
+  def ipv4_netblocks_equal(self, other, mask_bits):
+    return Candidate.netblocks_equal(self.dirip, other.dirip, mask_bits)
+
+  # is this fallback's IPv6 address (ipv6addr) in the same netblock as
+  # other's IPv6 address?
+  # Returns False if either fallback has no IPv6 address
+  # mask_bits is the size of the netblock
+  def ipv6_netblocks_equal(self, other, mask_bits):
+    if not self.has_ipv6() or not other.has_ipv6():
+      return False
+    return Candidate.netblocks_equal(self.ipv6addr, other.ipv6addr, mask_bits)
+
+  # is this fallback's IPv4 DirPort the same as other's IPv4 DirPort?
+  def dirport_equal(self, other):
+    return self.dirport == other.dirport
+
+  # is this fallback's IPv4 ORPort the same as other's IPv4 ORPort?
+  def ipv4_orport_equal(self, other):
+    return self.orport == other.orport
+
+  # is this fallback's IPv6 ORPort the same as other's IPv6 ORPort?
+  # Returns False if either fallback has no IPv6 address
+  def ipv6_orport_equal(self, other):
+    if not self.has_ipv6() or not other.has_ipv6():
+      return False
+    return self.ipv6orport == other.ipv6orport
+
+  # does this fallback have the same DirPort, IPv4 ORPort, or
+  # IPv6 ORPort as other?
+  # Ignores IPv6 ORPort if either fallback has no IPv6 address
+  def port_equal(self, other):
+    return (self.dirport_equal(other) or self.ipv4_orport_equal(other)
+            or self.ipv6_orport_equal(other))
+
+  # return a list containing IPv4 ORPort, DirPort, and IPv6 ORPort (if present)
+  def port_list(self):
+    ports = [self.dirport, self.orport]
+    if self.has_ipv6() and not self.ipv6orport in ports:
+      ports.append(self.ipv6orport)
+    return ports
+
+  # does this fallback share a port with other, regardless of whether the
+  # port types match?
+  # For example, if self's IPv4 ORPort is 80 and other's DirPort is 80,
+  # return True
+  def port_shared(self, other):
+    for p in self.port_list():
+      if p in other.port_list():
+        return True
+    return False
+
   # report how long it takes to download a consensus from dirip:dirport
   @staticmethod
   def fallback_consensus_download_speed(dirip, dirport, nickname, max_time):
@@ -984,7 +1102,7 @@ class Candidate(object):
                                                 self.dirport,
                                                 self._data['nickname'],
                                                 CONSENSUS_DOWNLOAD_SPEED_MAX)
-    if self.ipv6addr is not None and PERFORM_IPV6_DIRPORT_CHECKS:
+    if self.has_ipv6() and PERFORM_IPV6_DIRPORT_CHECKS:
       # Clients assume the IPv6 DirPort is the same as the IPv4 DirPort
       ipv6_failed = Candidate.fallback_consensus_download_speed(self.ipv6addr,
                                                 self.dirport,
@@ -1086,9 +1204,8 @@ class Candidate(object):
             self.orport,
             cleanse_c_string(self._fpr))
     s += '\n'
-    if self.ipv6addr is not None:
-      s += '" ipv6=%s:%s"'%(
-            cleanse_c_string(self.ipv6addr), cleanse_c_string(self.ipv6orport))
+    if self.has_ipv6():
+      s += '" ipv6=%s:%d"'%(cleanse_c_string(self.ipv6addr), self.ipv6orport)
       s += '\n'
     s += '" weight=%d",'%(FALLBACK_OUTPUT_WEIGHT)
     if comment_string:
@@ -1126,7 +1243,7 @@ class CandidateList(dict):
     d = fetch('details',
         fields=('fingerprint,nickname,contact,last_changed_address_or_port,' +
                 'consensus_weight,advertised_bandwidth,or_addresses,' +
-                'dir_address,recommended_version,flags'))
+                'dir_address,recommended_version,flags,effective_family'))
     logging.debug('Loading details document done.')
 
     if not 'relays' in d: raise Exception("No relays found in document.")
@@ -1163,19 +1280,19 @@ class CandidateList(dict):
   # lowest to highest
   # used to find the median cw_to_bw_factor()
   def sort_fallbacks_by_cw_to_bw_factor(self):
-    self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor(), self.fallbacks)
+    self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor())
 
   # sort fallbacks by their measured bandwidth, highest to lowest
   # calculate_measured_bandwidth before calling this
   # this is useful for reviewing candidates in priority order
   def sort_fallbacks_by_measured_bandwidth(self):
     self.fallbacks.sort(key=lambda f: f._data['measured_bandwidth'],
-                        self.fallbacks, reverse=True)
+                        reverse=True)
 
   # sort fallbacks by their fingerprint, lowest to highest
   # this is useful for stable diffs of fallback lists
   def sort_fallbacks_by_fingerprint(self):
-    self.fallbacks.sort(key=lambda f: self[f]._fpr, self.fallbacks)
+    self.fallbacks.sort(key=lambda f: f._fpr)
 
   @staticmethod
   def load_relaylist(file_name):
@@ -1341,6 +1458,91 @@ class CandidateList(dict):
     else:
       return None
 
+  # does exclusion_list contain attribute?
+  # if so, return False
+  # if not, return True
+  # if attribute is None or the empty string, always return True
+  @staticmethod
+  def allow(attribute, exclusion_list):
+    if attribute is None or attribute == '':
+      return True
+    elif attribute in exclusion_list:
+      return False
+    else:
+      return True
+
+  # make sure there is only one fallback per IPv4 address, and per IPv6 address
+  # there is only one IPv4 address on each fallback: the IPv4 DirPort address
+  # (we choose the IPv4 ORPort which is on the same IPv4 as the DirPort)
+  # there is at most one IPv6 address on each fallback: the IPv6 ORPort address
+  # we try to match the IPv4 ORPort, but will use any IPv6 address if needed
+  # (clients assume the IPv6 DirPort is the same as the IPv4 DirPort, but
+  # typically only use the IPv6 ORPort)
+  # if there is no IPv6 address, only the IPv4 address is checked
+  # return the number of candidates we excluded
+  def limit_fallbacks_same_ip(self):
+    ip_limit_fallbacks = []
+    ip_list = []
+    for f in self.fallbacks:
+      if (CandidateList.allow(f.dirip, ip_list)
+          and CandidateList.allow(f.ipv6addr, ip_list)):
+        ip_limit_fallbacks.append(f)
+        ip_list.append(f.dirip)
+        if f.has_ipv6():
+          ip_list.append(f.ipv6addr)
+      elif not CandidateList.allow(f.dirip, ip_list):
+        logging.debug('Eliminated %s: already have fallback on IPv4 %s'%(
+                                                          f._fpr, f.dirip))
+      elif f.has_ipv6() and not CandidateList.allow(f.ipv6addr, ip_list):
+        logging.debug('Eliminated %s: already have fallback on IPv6 %s'%(
+                                                          f._fpr, f.ipv6addr))
+    original_count = len(self.fallbacks)
+    self.fallbacks = ip_limit_fallbacks
+    return original_count - len(self.fallbacks)
+
+  # make sure there is only one fallback per ContactInfo
+  # if there is no ContactInfo, allow the fallback
+  # this check can be gamed by providing no ContactInfo, or by setting the
+  # ContactInfo to match another fallback
+  # However, given the likelihood that relays with the same ContactInfo will
+  # go down at similar times, its usefulness outweighs the risk
+  def limit_fallbacks_same_contact(self):
+    contact_limit_fallbacks = []
+    contact_list = []
+    for f in self.fallbacks:
+      if CandidateList.allow(f._data['contact'], contact_list):
+        contact_limit_fallbacks.append(f)
+        contact_list.append(f._data['contact'])
+      else:
+        logging.debug(('Eliminated %s: already have fallback on ' +
+                       'ContactInfo %s')%(f._fpr, f._data['contact']))
+    original_count = len(self.fallbacks)
+    self.fallbacks = contact_limit_fallbacks
+    return original_count - len(self.fallbacks)
+
+  # make sure there is only one fallback per effective family
+  # if there is no family, allow the fallback
+  # this check can't be gamed, because we use effective family, which ensures
+  # mutual family declarations
+  # if any indirect families exist, the result depends on the order in which
+  # fallbacks are sorted in the list
+  def limit_fallbacks_same_family(self):
+    family_limit_fallbacks = []
+    fingerprint_list = []
+    for f in self.fallbacks:
+      if CandidateList.allow(f._fpr, fingerprint_list):
+        family_limit_fallbacks.append(f)
+        fingerprint_list.append(f._fpr)
+        fingerprint_list.extend(f._data['effective_family'])
+      else:
+        # technically, we already have a fallback with this fallback in its
+        # effective family
+        logging.debug('Eliminated %s: already have fallback in effective ' +
+                      'family'%(f._fpr))
+    original_count = len(self.fallbacks)
+    self.fallbacks = family_limit_fallbacks
+    return original_count - len(self.fallbacks)
+
   # try a download check on each fallback candidate in order
   # stop after max_count successful downloads
   # but don't remove any candidates from the array
@@ -1361,6 +1563,7 @@ class CandidateList(dict):
   # - eliminate failed candidates
   # - if there are more than max_count candidates, eliminate lowest bandwidth
   # - if there are fewer than max_count candidates, leave only successful
+  # Return the number of fallbacks that failed the consensus check
   def perform_download_consensus_checks(self, max_count):
     self.sort_fallbacks_by_measured_bandwidth()
     self.try_download_consensus_checks(max_count)
@@ -1370,12 +1573,245 @@ class CandidateList(dict):
       self.try_download_consensus_checks(max_count)
     # now we have at least max_count successful candidates,
     # or we've tried them all
+    original_count = len(self.fallbacks)
     self.fallbacks = filter(lambda x: x.get_fallback_download_consensus(),
                             self.fallbacks)
+    # some of these failed the check, others skipped the check,
+    # if we already had enough successful downloads
+    failed_count = original_count - len(self.fallbacks)
     self.fallbacks = self.fallbacks[:max_count]
+    return failed_count
+
+  # return a string that describes a/b as a percentage
+  @staticmethod
+  def describe_percentage(a, b):
+    return '%d/%d = %.0f%%'%(a, b, (a*100.0)/b)
+
+  # return a dictionary of lists of fallbacks by IPv4 netblock
+  # the dictionary is keyed by the fingerprint of an arbitrary fallback
+  # in each netblock
+  # mask_bits is the size of the netblock
+  def fallbacks_by_ipv4_netblock(self, mask_bits):
+    netblocks = {}
+    for f in self.fallbacks:
+      found_netblock = False
+      for b in netblocks.keys():
+        # we found an existing netblock containing this fallback
+        if f.ipv4_netblocks_equal(self[b], mask_bits):
+          # add it to the list
+          netblocks[b].append(f)
+          found_netblock = True
+          break
+      # make a new netblock based on this fallback's fingerprint
+      if not found_netblock:
+        netblocks[f._fpr] = [f]
+    return netblocks
+
+  # return a dictionary of lists of fallbacks by IPv6 netblock
+  # where mask_bits is the size of the netblock
+  def fallbacks_by_ipv6_netblock(self, mask_bits):
+    netblocks = {}
+    for f in self.fallbacks:
+      # skip fallbacks without IPv6 addresses
+      if not f.has_ipv6():
+        continue
+      found_netblock = False
+      for b in netblocks.keys():
+        # we found an existing netblock containing this fallback
+        if f.ipv6_netblocks_equal(self[b], mask_bits):
+          # add it to the list
+          netblocks[b].append(f)
+          found_netblock = True
+          break
+      # make a new netblock based on this fallback's fingerprint
+      if not found_netblock:
+        netblocks[f._fpr] = [f]
+    return netblocks
+
+  # log a message about the proportion of fallbacks in each IPv4 netblock,
+  # where mask_bits is the size of the netblock
+  def describe_fallback_ipv4_netblock_mask(self, mask_bits):
+    fallback_count = len(self.fallbacks)
+    shared_netblock_fallback_count = 0
+    most_frequent_netblock = None
+    netblocks = self.fallbacks_by_ipv4_netblock(mask_bits)
+    for b in netblocks.keys():
+      if len(netblocks[b]) > 1:
+        # how many fallbacks are in a netblock with other fallbacks?
+        shared_netblock_fallback_count += len(netblocks[b])
+        # what's the netblock with the most fallbacks?
+        if (most_frequent_netblock is None
+            or len(netblocks[b]) > len(netblocks[most_frequent_netblock])):
+          most_frequent_netblock = b
+        logging.debug('Fallback IPv4 addresses in the same /%d:'%(mask_bits))
+        for f in netblocks[b]:
+          logging.debug('%s - %s', f.dirip, f._fpr)
+    if most_frequent_netblock is not None:
+      logging.warning('There are %s fallbacks in the IPv4 /%d containing %s'%(
+                                    CandidateList.describe_percentage(
+                                      len(netblocks[most_frequent_netblock]),
+                                      fallback_count),
+                                    mask_bits,
+                                    self[most_frequent_netblock].dirip))
+    if shared_netblock_fallback_count > 0:
+      logging.warning(('%s of fallbacks are in an IPv4 /%d with other ' +
+                       'fallbacks')%(CandidateList.describe_percentage(
+                                                shared_netblock_fallback_count,
+                                                fallback_count),
+                                     mask_bits))
+
+  # log a message about the proportion of fallbacks in each IPv6 netblock,
+  # where mask_bits is the size of the netblock
+  def describe_fallback_ipv6_netblock_mask(self, mask_bits):
+    fallback_count = len(self.fallbacks_with_ipv6())
+    shared_netblock_fallback_count = 0
+    most_frequent_netblock = None
+    netblocks = self.fallbacks_by_ipv6_netblock(mask_bits)
+    for b in netblocks.keys():
+      if len(netblocks[b]) > 1:
+        # how many fallbacks are in a netblock with other fallbacks?
+        shared_netblock_fallback_count += len(netblocks[b])
+        # what's the netblock with the most fallbacks?
+        if (most_frequent_netblock is None
+            or len(netblocks[b]) > len(netblocks[most_frequent_netblock])):
+          most_frequent_netblock = b
+        logging.debug('Fallback IPv6 addresses in the same /%d:'%(mask_bits))
+        for f in netblocks[b]:
+          logging.debug('%s - %s', f.ipv6addr, f._fpr)
+    if most_frequent_netblock is not None:
+      logging.warning('There are %s fallbacks in the IPv6 /%d containing %s'%(
+                                    CandidateList.describe_percentage(
+                                      len(netblocks[most_frequent_netblock]),
+                                      fallback_count),
+                                    mask_bits,
+                                    self[most_frequent_netblock].ipv6addr))
+    if shared_netblock_fallback_count > 0:
+      logging.warning(('%s of fallbacks are in an IPv6 /%d with other ' +
+                       'fallbacks')%(CandidateList.describe_percentage(
+                                                shared_netblock_fallback_count,
+                                                fallback_count),
+                                     mask_bits))
+
+  # log a message about the proportion of fallbacks in each IPv4 /8, /16,
+  # and /24
+  def describe_fallback_ipv4_netblocks(self):
+   # this doesn't actually tell us anything useful
+   #self.describe_fallback_ipv4_netblock_mask(8)
+   self.describe_fallback_ipv4_netblock_mask(16)
+   self.describe_fallback_ipv4_netblock_mask(24)
+
+  # log a message about the proportion of fallbacks in each IPv6 /12 (RIR),
+  # /23 (smaller RIR blocks), /32 (LIR), /48 (Customer), and /64 (Host)
+  # https://www.iana.org/assignments/ipv6-unicast-address-assignments/
+  def describe_fallback_ipv6_netblocks(self):
+    # these don't actually tell us anything useful
+    #self.describe_fallback_ipv6_netblock_mask(12)
+    #self.describe_fallback_ipv6_netblock_mask(23)
+    self.describe_fallback_ipv6_netblock_mask(32)
+    self.describe_fallback_ipv6_netblock_mask(48)
+    self.describe_fallback_ipv6_netblock_mask(64)
+
+  # log a message about the proportion of fallbacks in each IPv4 and IPv6
+  # netblock
+  def describe_fallback_netblocks(self):
+    self.describe_fallback_ipv4_netblocks()
+    self.describe_fallback_ipv6_netblocks()
+
+  # return a list of fallbacks which are on the IPv4 ORPort port
+  def fallbacks_on_ipv4_orport(self, port):
+    return filter(lambda x: x.orport == port, self.fallbacks)
+
+  # return a list of fallbacks which are on the IPv6 ORPort port
+  def fallbacks_on_ipv6_orport(self, port):
+    return filter(lambda x: x.ipv6orport == port, self.fallbacks_with_ipv6())
+
+  # return a list of fallbacks which are on the DirPort port
+  def fallbacks_on_dirport(self, port):
+    return filter(lambda x: x.dirport == port, self.fallbacks)
+
+  # log a message about the proportion of fallbacks on IPv4 ORPort port
+  # and return that count
+  def describe_fallback_ipv4_orport(self, port):
+    port_count = len(self.fallbacks_on_ipv4_orport(port))
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks are on IPv4 ORPort %d'%(
+                    CandidateList.describe_percentage(port_count,
+                                                      fallback_count),
+                    port))
+    return port_count
+
+  # log a message about the proportion of IPv6 fallbacks on IPv6 ORPort port
+  # and return that count
+  def describe_fallback_ipv6_orport(self, port):
+    port_count = len(self.fallbacks_on_ipv6_orport(port))
+    fallback_count = len(self.fallbacks_with_ipv6())
+    logging.warning('%s of IPv6 fallbacks are on IPv6 ORPort %d'%(
+                    CandidateList.describe_percentage(port_count,
+                                                      fallback_count),
+                    port))
+    return port_count
+
+  # log a message about the proportion of fallbacks on DirPort port
+  # and return that count
+  def describe_fallback_dirport(self, port):
+    port_count = len(self.fallbacks_on_dirport(port))
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks are on DirPort %d'%(
+                    CandidateList.describe_percentage(port_count,
+                                                      fallback_count),
+                    port))
+    return port_count
+
+  # log a message about the proportion of fallbacks on each dirport,
+  # each IPv4 orport, and each IPv6 orport
+  def describe_fallback_ports(self):
+    fallback_count = len(self.fallbacks)
+    ipv4_or_count = fallback_count
+    ipv4_or_count -= self.describe_fallback_ipv4_orport(443)
+    ipv4_or_count -= self.describe_fallback_ipv4_orport(9001)
+    logging.warning('%s of fallbacks are on other IPv4 ORPorts'%(
+                    CandidateList.describe_percentage(ipv4_or_count,
+                                                      fallback_count)))
+    ipv6_fallback_count = len(self.fallbacks_with_ipv6())
+    ipv6_or_count = ipv6_fallback_count
+    ipv6_or_count -= self.describe_fallback_ipv6_orport(443)
+    ipv6_or_count -= self.describe_fallback_ipv6_orport(9001)
+    logging.warning('%s of IPv6 fallbacks are on other IPv6 ORPorts'%(
+                    CandidateList.describe_percentage(ipv6_or_count,
+                                                      ipv6_fallback_count)))
+    dir_count = fallback_count
+    dir_count -= self.describe_fallback_dirport(80)
+    dir_count -= self.describe_fallback_dirport(9030)
+    logging.warning('%s of fallbacks are on other DirPorts'%(
+                    CandidateList.describe_percentage(dir_count,
+                                                      fallback_count)))
+
+  # return a list of fallbacks which have the Exit flag
+  def fallbacks_with_exit(self):
+    return filter(lambda x: x.is_exit(), self.fallbacks)
+
+  # log a message about the proportion of fallbacks with an Exit flag
+  def describe_fallback_exit_flag(self):
+    exit_falback_count = len(self.fallbacks_with_exit())
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks have the Exit flag'%(
+                    CandidateList.describe_percentage(exit_falback_count,
+                                                      fallback_count)))
+
+  # return a list of fallbacks which have an IPv6 address
+  def fallbacks_with_ipv6(self):
+    return filter(lambda x: x.has_ipv6(), self.fallbacks)
+
+  # log a message about the proportion of fallbacks on IPv6
+  def describe_fallback_ip_family(self):
+    ipv6_falback_count = len(self.fallbacks_with_ipv6())
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks are on IPv6'%(
+                    CandidateList.describe_percentage(ipv6_falback_count,
+                                                      fallback_count)))
 
-  def summarise_fallbacks(self, eligible_count, guard_count, target_count,
-                          max_count):
+  def summarise_fallbacks(self, eligible_count, operator_count, failed_count,
+                          guard_count, target_count):
     # Report:
     #  whether we checked consensus download times
     #  the number of fallback directories (and limits/exclusions, if relevant)
@@ -1399,17 +1835,23 @@ class CandidateList(dict):
     if FALLBACK_PROPORTION_OF_GUARDS is None:
       fallback_proportion = ''
     else:
-      fallback_proportion = ', Target %d (%d * %f)'%(target_count, guard_count,
-                                                 FALLBACK_PROPORTION_OF_GUARDS)
-    s += 'Final Count: %d (Eligible %d%s'%(fallback_count,
-                                           eligible_count,
+      fallback_proportion = ', Target %d (%d * %.2f)'%(target_count,
+                                                guard_count,
+                                                FALLBACK_PROPORTION_OF_GUARDS)
+    s += 'Final Count: %d (Eligible %d%s'%(fallback_count, eligible_count,
                                            fallback_proportion)
     if MAX_FALLBACK_COUNT is not None:
-      s += ', Clamped to %d'%(MAX_FALLBACK_COUNT)
+      s += ', Max %d'%(MAX_FALLBACK_COUNT)
     s += ')\n'
     if eligible_count != fallback_count:
-      s += 'Excluded:     %d (Eligible Count Exceeded Target Count)'%(
-                                              eligible_count - fallback_count)
+      removed_count = eligible_count - fallback_count
+      excess_to_target_or_max = (eligible_count - operator_count - failed_count
+                                 - fallback_count)
+      # some 'Failed' failed the check, others 'Skipped' the check,
+      # if we already had enough successful downloads
+      s += ('Excluded: %d (Same Operator %d, Failed/Skipped Download %d, ' +
+            'Excess %d)')%(removed_count, operator_count, failed_count,
+                           excess_to_target_or_max)
       s += '\n'
     min_fb = self.fallback_min()
     min_bw = min_fb._data['measured_bandwidth']
@@ -1473,18 +1915,46 @@ def list_fallbacks():
   #  print json.dumps(candidates[x]._data, sort_keys=True, indent=4,
   #                   separators=(',', ': '), default=json_util.default)
 
+  # impose mandatory conditions here, like one per contact, family, IP
+  # in measured bandwidth order
+  candidates.sort_fallbacks_by_measured_bandwidth()
+  operator_count = 0
+  # only impose these limits on the final list - operators can nominate
+  # multiple candidate fallbacks, and then we choose the best set
+  if not OUTPUT_CANDIDATES:
+    operator_count += candidates.limit_fallbacks_same_ip()
+    operator_count += candidates.limit_fallbacks_same_contact()
+    operator_count += candidates.limit_fallbacks_same_family()
+
+  # check if each candidate can serve a consensus
+  # there's a small risk we've eliminated relays from the same operator that
+  # can serve a consensus, in favour of one that can't
+  # but given it takes up to 15 seconds to check each consensus download,
+  # the risk is worth it
+  failed_count = candidates.perform_download_consensus_checks(max_count)
+
+  # analyse and log interesting diversity metrics
+  # like netblock, ports, exit, IPv4-only
+  # (we can't easily analyse AS, and it's hard to accurately analyse country)
+  candidates.describe_fallback_ip_family()
+  # if we can't import the ipaddress module, we can't do netblock analysis
+  if HAVE_IPADDRESS:
+    candidates.describe_fallback_netblocks()
+  candidates.describe_fallback_ports()
+  candidates.describe_fallback_exit_flag()
+
+  # output C comments summarising the fallback selection process
   if len(candidates.fallbacks) > 0:
-    print candidates.summarise_fallbacks(eligible_count, guard_count,
-                                         target_count, max_count)
+    print candidates.summarise_fallbacks(eligible_count, operator_count,
+                                         failed_count, guard_count,
+                                         target_count)
   else:
     print '/* No Fallbacks met criteria */'
 
+  # output C comments specifying the OnionOO data used to create the list
   for s in fetch_source_list():
     print describe_fetch_source(s)
 
-  # check if each candidate can serve a consensus
-  candidates.perform_download_consensus_checks(max_count)
-
   # if we're outputting the final fallback list, sort by fingerprint
   # this makes diffs much more stable
   # otherwise, leave sorted by bandwidth, which allows operators to be