1 files changed, 1999 insertions, 0 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
new file mode 100755
index 0000000000..110ecda64c
--- /dev/null
+++ b/scripts/maint/updateFallbackDirs.py
@@ -0,0 +1,1999 @@
+#!/usr/bin/python
+
+# Usage: scripts/maint/updateFallbackDirs.py > src/or/fallback_dirs.inc
+#
+# This script should be run from a stable, reliable network connection,
+# with no other network activity (and not over tor).
+# If this is not possible, please disable:
+# PERFORM_IPV4_DIRPORT_CHECKS and PERFORM_IPV6_DIRPORT_CHECKS
+#
+# Needs dateutil (and potentially other python packages)
+# Needs stem available in your PYTHONPATH, or just ln -s ../stem/stem .
+# Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package)
+# for netblock analysis, in PYTHONPATH, or just
+# ln -s ../py2-ipaddress-3.4.1/ipaddress.py .
+#
+# Then read the logs to make sure the fallbacks aren't dominated by a single
+# netblock or port
+
+# Script by weasel, April 2015
+# Portions by gsathya & karsten, 2013
+# https://trac.torproject.org/projects/tor/attachment/ticket/8374/dir_list.2.py
+# Modifications by teor, 2015
+
+import StringIO
+import string
+import re
+import datetime
+import gzip
+import os.path
+import json
+import math
+import sys
+import urllib
+import urllib2
+import hashlib
+import dateutil.parser
+# bson_lazy provides bson
+#from bson import json_util
+import copy
+
+from stem.descriptor.remote import DescriptorDownloader
+
+import logging
+# INFO tells you why each relay was included or excluded
+# WARN tells you about potential misconfigurations and relay detail changes
+logging.basicConfig(level=logging.WARNING)
+logging.root.name = ''
+# INFO tells you about each consensus download attempt
+logging.getLogger('stem').setLevel(logging.WARNING)
+
+HAVE_IPADDRESS = False
+try:
+  # python 3 builtin, or install package py2-ipaddress
+  # there are several ipaddress implementations for python 2
+  # with slightly different semantics with str typed text
+  # fortunately, all our IP addresses are in unicode
+  import ipaddress
+  HAVE_IPADDRESS = True
+except ImportError:
+  # if this happens, we avoid doing netblock analysis
+  logging.warning('Unable to import ipaddress, please install py2-ipaddress.' +
+                  ' A fallback list will be created, but optional netblock' +
+                  ' analysis will not be performed.')
+
+## Top-Level Configuration
+
+# Output all candidate fallbacks, or only output selected fallbacks?
+OUTPUT_CANDIDATES = False
+
+# Perform DirPort checks over IPv4?
+# Change this to False if IPv4 doesn't work for you, or if you don't want to
+# download a consensus for each fallback
+# Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
+PERFORM_IPV4_DIRPORT_CHECKS = False if OUTPUT_CANDIDATES else True
+
+# Perform DirPort checks over IPv6?
+# If you know IPv6 works for you, set this to True
+# This will exclude IPv6 relays without an IPv6 DirPort configured
+# So it's best left at False until #18394 is implemented
+# Don't check ~1000 candidates when OUTPUT_CANDIDATES is True
+PERFORM_IPV6_DIRPORT_CHECKS = False if OUTPUT_CANDIDATES else False
+
+# Output fallback name, flags, and ContactInfo in a C comment?
+OUTPUT_COMMENTS = True if OUTPUT_CANDIDATES else False
+
+# Output matching ContactInfo in fallbacks list or the blacklist?
+# Useful if you're trying to contact operators
+CONTACT_COUNT = True if OUTPUT_CANDIDATES else False
+CONTACT_BLACKLIST_COUNT = True if OUTPUT_CANDIDATES else False
+
+## OnionOO Settings
+
+ONIONOO = 'https://onionoo.torproject.org/'
+#ONIONOO = 'https://onionoo.thecthulhu.com/'
+
+# Don't bother going out to the Internet, just use the files available locally,
+# even if they're very old
+LOCAL_FILES_ONLY = False
+
+## Whitelist / Blacklist Filter Settings
+
+# The whitelist contains entries that are included if all attributes match
+# (IPv4, dirport, orport, id, and optionally IPv6 and IPv6 orport)
+# The blacklist contains (partial) entries that are excluded if any
+# sufficiently specific group of attributes matches:
+# IPv4 & DirPort
+# IPv4 & ORPort
+# ID
+# IPv6 & DirPort
+# IPv6 & IPv6 ORPort
+# If neither port is included in the blacklist, the entire IP address is
+# blacklisted.
+
+# What happens to entries in neither list?
+# When True, they are included, when False, they are excluded
+INCLUDE_UNLISTED_ENTRIES = True if OUTPUT_CANDIDATES else False
+
+# If an entry is in both lists, what happens?
+# When True, it is excluded, when False, it is included
+BLACKLIST_EXCLUDES_WHITELIST_ENTRIES = True
+
+WHITELIST_FILE_NAME = 'scripts/maint/fallback.whitelist'
+BLACKLIST_FILE_NAME = 'scripts/maint/fallback.blacklist'
+
+# The number of bytes we'll read from a filter file before giving up
+MAX_LIST_FILE_SIZE = 1024 * 1024
+
+## Eligibility Settings
+
+# Reduced due to a bug in tor where a relay submits a 0 DirPort when restarted
+# This causes OnionOO to (correctly) reset its stability timer
+# This issue will be fixed in 0.2.7.7 and 0.2.8.2
+# Until then, the CUTOFFs below ensure a decent level of stability.
+ADDRESS_AND_PORT_STABLE_DAYS = 7
+# What time-weighted-fraction of these flags must FallbackDirs
+# Equal or Exceed?
+CUTOFF_RUNNING = .95
+CUTOFF_V2DIR = .95
+CUTOFF_GUARD = .95
+# What time-weighted-fraction of these flags must FallbackDirs
+# Equal or Fall Under?
+# .00 means no bad exits
+PERMITTED_BADEXIT = .00
+
+# older entries' weights are adjusted with ALPHA^(age in days)
+AGE_ALPHA = 0.99
+
+# this factor is used to scale OnionOO entries to [0,1]
+ONIONOO_SCALE_ONE = 999.
+
+## Fallback Count Limits
+
+# The target for these parameters is 20% of the guards in the network
+# This is around 200 as of October 2015
+_FB_POG = 0.2
+FALLBACK_PROPORTION_OF_GUARDS = None if OUTPUT_CANDIDATES else _FB_POG
+
+# We want exactly 100 fallbacks for the initial release
+# This gives us scope to add extra fallbacks to the list as needed
+# Limit the number of fallbacks (eliminating lowest by advertised bandwidth)
+MAX_FALLBACK_COUNT = None if OUTPUT_CANDIDATES else 100
+# Emit a C #error if the number of fallbacks is below
+MIN_FALLBACK_COUNT = 100
+
+## Fallback Bandwidth Requirements
+
+# Any fallback with the Exit flag has its bandwidth multipled by this fraction
+# to make sure we aren't further overloading exits
+# (Set to 1.0, because we asked that only lightly loaded exits opt-in,
+# and the extra load really isn't that much for large relays.)
+EXIT_BANDWIDTH_FRACTION = 1.0
+
+# If a single fallback's bandwidth is too low, it's pointless adding it
+# We expect fallbacks to handle an extra 30 kilobytes per second of traffic
+# Make sure they can support a hundred times the expected extra load
+# (Use 102.4 to make it come out nicely in MB/s)
+# We convert this to a consensus weight before applying the filter,
+# because all the bandwidth amounts are specified by the relay
+MIN_BANDWIDTH = 102.4 * 30.0 * 1024.0
+
+# Clients will time out after 30 seconds trying to download a consensus
+# So allow fallback directories half that to deliver a consensus
+# The exact download times might change based on the network connection
+# running this script, but only by a few seconds
+# There is also about a second of python overhead
+CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0
+# If the relay fails a consensus check, retry the download
+# This avoids delisting a relay due to transient network conditions
+CONSENSUS_DOWNLOAD_RETRY = True
+
+## Fallback Weights for Client Selection
+
+# All fallback weights are equal, and set to the value below
+# Authorities are weighted 1.0 by default
+# Clients use these weights to select fallbacks and authorities at random
+# If there are 100 fallbacks and 9 authorities:
+#  - each fallback is chosen with probability 10.0/(10.0*100 + 1.0*9) ~= 0.99%
+#  - each authority is chosen with probability 1.0/(10.0*100 + 1.0*9) ~= 0.09%
+# A client choosing a bootstrap directory server will choose a fallback for
+# 10.0/(10.0*100 + 1.0*9) * 100 = 99.1% of attempts, and an authority for
+# 1.0/(10.0*100 + 1.0*9) * 9 = 0.9% of attempts.
+# (This disregards the bootstrap schedules, where clients start by choosing
+# from fallbacks & authoritites, then later choose from only authorities.)
+FALLBACK_OUTPUT_WEIGHT = 10.0
+
+## Parsing Functions
+
+def parse_ts(t):
+  return datetime.datetime.strptime(t, "%Y-%m-%d %H:%M:%S")
+
+def remove_bad_chars(raw_string, bad_char_list):
+  # Remove each character in the bad_char_list
+  cleansed_string = raw_string
+  for c in bad_char_list:
+    cleansed_string = cleansed_string.replace(c, '')
+  return cleansed_string
+
+def cleanse_unprintable(raw_string):
+  # Remove all unprintable characters
+  cleansed_string = ''
+  for c in raw_string:
+    if c in string.printable:
+      cleansed_string += c
+  return cleansed_string
+
+def cleanse_whitespace(raw_string):
+  # Replace all whitespace characters with a space
+  cleansed_string = raw_string
+  for c in string.whitespace:
+    cleansed_string = cleansed_string.replace(c, ' ')
+  return cleansed_string
+
+def cleanse_c_multiline_comment(raw_string):
+  cleansed_string = raw_string
+  # Embedded newlines should be removed by tor/onionoo, but let's be paranoid
+  cleansed_string = cleanse_whitespace(cleansed_string)
+  # ContactInfo and Version can be arbitrary binary data
+  cleansed_string = cleanse_unprintable(cleansed_string)
+  # Prevent a malicious / unanticipated string from breaking out
+  # of a C-style multiline comment
+  # This removes '/*' and '*/' and '//'
+  bad_char_list = '*/'
+  # Prevent a malicious string from using C nulls
+  bad_char_list += '\0'
+  # Be safer by removing bad characters entirely
+  cleansed_string = remove_bad_chars(cleansed_string, bad_char_list)
+  # Some compilers may further process the content of comments
+  # There isn't much we can do to cover every possible case
+  # But comment-based directives are typically only advisory
+  return cleansed_string
+
+def cleanse_c_string(raw_string):
+  cleansed_string = raw_string
+  # Embedded newlines should be removed by tor/onionoo, but let's be paranoid
+  cleansed_string = cleanse_whitespace(cleansed_string)
+  # ContactInfo and Version can be arbitrary binary data
+  cleansed_string = cleanse_unprintable(cleansed_string)
+  # Prevent a malicious address/fingerprint string from breaking out
+  # of a C-style string
+  bad_char_list = '"'
+  # Prevent a malicious string from using escapes
+  bad_char_list += '\\'
+  # Prevent a malicious string from using C nulls
+  bad_char_list += '\0'
+  # Be safer by removing bad characters entirely
+  cleansed_string = remove_bad_chars(cleansed_string, bad_char_list)
+  # Some compilers may further process the content of strings
+  # There isn't much we can do to cover every possible case
+  # But this typically only results in changes to the string data
+  return cleansed_string
+
+## OnionOO Source Functions
+
+# a dictionary of source metadata for each onionoo query we've made
+fetch_source = {}
+
+# register source metadata for 'what'
+# assumes we only retrieve one document for each 'what'
+def register_fetch_source(what, url, relays_published, version):
+  fetch_source[what] = {}
+  fetch_source[what]['url'] = url
+  fetch_source[what]['relays_published'] = relays_published
+  fetch_source[what]['version'] = version
+
+# list each registered source's 'what'
+def fetch_source_list():
+  return sorted(fetch_source.keys())
+
+# given 'what', provide a multiline C comment describing the source
+def describe_fetch_source(what):
+  desc = '/*'
+  desc += '\n'
+  desc += 'Onionoo Source: '
+  desc += cleanse_c_multiline_comment(what)
+  desc += ' Date: '
+  desc += cleanse_c_multiline_comment(fetch_source[what]['relays_published'])
+  desc += ' Version: '
+  desc += cleanse_c_multiline_comment(fetch_source[what]['version'])
+  desc += '\n'
+  desc += 'URL: '
+  desc += cleanse_c_multiline_comment(fetch_source[what]['url'])
+  desc += '\n'
+  desc += '*/'
+  return desc
+
+## File Processing Functions
+
+def write_to_file(str, file_name, max_len):
+  try:
+    with open(file_name, 'w') as f:
+      f.write(str[0:max_len])
+  except EnvironmentError, error:
+    logging.error('Writing file %s failed: %d: %s'%
+                  (file_name,
+                   error.errno,
+                   error.strerror)
+                  )
+
+def read_from_file(file_name, max_len):
+  try:
+    if os.path.isfile(file_name):
+      with open(file_name, 'r') as f:
+        return f.read(max_len)
+  except EnvironmentError, error:
+    logging.info('Loading file %s failed: %d: %s'%
+                 (file_name,
+                  error.errno,
+                  error.strerror)
+                 )
+  return None
+
+def load_possibly_compressed_response_json(response):
+    if response.info().get('Content-Encoding') == 'gzip':
+      buf = StringIO.StringIO( response.read() )
+      f = gzip.GzipFile(fileobj=buf)
+      return json.load(f)
+    else:
+      return json.load(response)
+
+def load_json_from_file(json_file_name):
+    # An exception here may be resolved by deleting the .last_modified
+    # and .json files, and re-running the script
+    try:
+      with open(json_file_name, 'r') as f:
+        return json.load(f)
+    except EnvironmentError, error:
+      raise Exception('Reading not-modified json file %s failed: %d: %s'%
+                    (json_file_name,
+                     error.errno,
+                     error.strerror)
+                    )
+
+## OnionOO Functions
+
+def datestr_to_datetime(datestr):
+  # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
+  if datestr is not None:
+    dt = dateutil.parser.parse(datestr)
+  else:
+    # Never modified - use start of epoch
+    dt = datetime.datetime.utcfromtimestamp(0)
+  # strip any timezone out (in case they're supported in future)
+  dt = dt.replace(tzinfo=None)
+  return dt
+
+def onionoo_fetch(what, **kwargs):
+  params = kwargs
+  params['type'] = 'relay'
+  #params['limit'] = 10
+  params['first_seen_days'] = '%d-'%(ADDRESS_AND_PORT_STABLE_DAYS,)
+  params['last_seen_days'] = '-7'
+  params['flag'] = 'V2Dir'
+  url = ONIONOO + what + '?' + urllib.urlencode(params)
+
+  # Unfortunately, the URL is too long for some OS filenames,
+  # but we still don't want to get files from different URLs mixed up
+  base_file_name = what + '-' + hashlib.sha1(url).hexdigest()
+
+  full_url_file_name = base_file_name + '.full_url'
+  MAX_FULL_URL_LENGTH = 1024
+
+  last_modified_file_name = base_file_name + '.last_modified'
+  MAX_LAST_MODIFIED_LENGTH = 64
+
+  json_file_name = base_file_name + '.json'
+
+  if LOCAL_FILES_ONLY:
+    # Read from the local file, don't write to anything
+    response_json = load_json_from_file(json_file_name)
+  else:
+    # store the full URL to a file for debugging
+    # no need to compare as long as you trust SHA-1
+    write_to_file(url, full_url_file_name, MAX_FULL_URL_LENGTH)
+
+    request = urllib2.Request(url)
+    request.add_header('Accept-encoding', 'gzip')
+
+    # load the last modified date from the file, if it exists
+    last_mod_date = read_from_file(last_modified_file_name,
+                                   MAX_LAST_MODIFIED_LENGTH)
+    if last_mod_date is not None:
+      request.add_header('If-modified-since', last_mod_date)
+
+    # Parse last modified date
+    last_mod = datestr_to_datetime(last_mod_date)
+
+    # Not Modified and still recent enough to be useful
+    # Onionoo / Globe used to use 6 hours, but we can afford a day
+    required_freshness = datetime.datetime.utcnow()
+    # strip any timezone out (to match dateutil.parser)
+    required_freshness = required_freshness.replace(tzinfo=None)
+    required_freshness -= datetime.timedelta(hours=24)
+
+    # Make the OnionOO request
+    response_code = 0
+    try:
+      response = urllib2.urlopen(request)
+      response_code = response.getcode()
+    except urllib2.HTTPError, error:
+      response_code = error.code
+      if response_code == 304: # not modified
+        pass
+      else:
+        raise Exception("Could not get " + url + ": "
+                        + str(error.code) + ": " + error.reason)
+
+    if response_code == 200: # OK
+      last_mod = datestr_to_datetime(response.info().get('Last-Modified'))
+
+    # Check for freshness
+    if last_mod < required_freshness:
+      if last_mod_date is not None:
+        # This check sometimes fails transiently, retry the script if it does
+        date_message = "Outdated data: last updated " + last_mod_date
+      else:
+        date_message = "No data: never downloaded "
+      raise Exception(date_message + " from " + url)
+
+    # Process the data
+    if response_code == 200: # OK
+
+      response_json = load_possibly_compressed_response_json(response)
+
+      with open(json_file_name, 'w') as f:
+        # use the most compact json representation to save space
+        json.dump(response_json, f, separators=(',',':'))
+
+      # store the last modified date in its own file
+      if response.info().get('Last-modified') is not None:
+        write_to_file(response.info().get('Last-Modified'),
+                      last_modified_file_name,
+                      MAX_LAST_MODIFIED_LENGTH)
+
+    elif response_code == 304: # Not Modified
+
+      response_json = load_json_from_file(json_file_name)
+
+    else: # Unexpected HTTP response code not covered in the HTTPError above
+      raise Exception("Unexpected HTTP response code to " + url + ": "
+                      + str(response_code))
+
+  register_fetch_source(what,
+                        url,
+                        response_json['relays_published'],
+                        response_json['version'])
+
+  return response_json
+
+def fetch(what, **kwargs):
+  #x = onionoo_fetch(what, **kwargs)
+  # don't use sort_keys, as the order of or_addresses is significant
+  #print json.dumps(x, indent=4, separators=(',', ': '))
+  #sys.exit(0)
+
+  return onionoo_fetch(what, **kwargs)
+
+## Fallback Candidate Class
+
+class Candidate(object):
+  CUTOFF_ADDRESS_AND_PORT_STABLE = (datetime.datetime.utcnow()
+                            - datetime.timedelta(ADDRESS_AND_PORT_STABLE_DAYS))
+
+  def __init__(self, details):
+    for f in ['fingerprint', 'nickname', 'last_changed_address_or_port',
+              'consensus_weight', 'or_addresses', 'dir_address']:
+      if not f in details: raise Exception("Document has no %s field."%(f,))
+
+    if not 'contact' in details:
+      details['contact'] = None
+    if not 'flags' in details or details['flags'] is None:
+      details['flags'] = []
+    if (not 'advertised_bandwidth' in details
+        or details['advertised_bandwidth'] is None):
+      # relays without advertised bandwdith have it calculated from their
+      # consensus weight
+      details['advertised_bandwidth'] = 0
+    if (not 'effective_family' in details
+        or details['effective_family'] is None):
+      details['effective_family'] = []
+    details['last_changed_address_or_port'] = parse_ts(
+                                      details['last_changed_address_or_port'])
+    self._data = details
+    self._stable_sort_or_addresses()
+
+    self._fpr = self._data['fingerprint']
+    self._running = self._guard = self._v2dir = 0.
+    self._split_dirport()
+    self._compute_orport()
+    if self.orport is None:
+      raise Exception("Failed to get an orport for %s."%(self._fpr,))
+    self._compute_ipv6addr()
+    if not self.has_ipv6():
+      logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
+
+  def _stable_sort_or_addresses(self):
+    # replace self._data['or_addresses'] with a stable ordering,
+    # sorting the secondary addresses in string order
+    # leave the received order in self._data['or_addresses_raw']
+    self._data['or_addresses_raw'] = self._data['or_addresses']
+    or_address_primary = self._data['or_addresses'][:1]
+    # subsequent entries in the or_addresses array are in an arbitrary order
+    # so we stabilise the addresses by sorting them in string order
+    or_addresses_secondaries_stable = sorted(self._data['or_addresses'][1:])
+    or_addresses_stable = or_address_primary + or_addresses_secondaries_stable
+    self._data['or_addresses'] = or_addresses_stable
+
+  def get_fingerprint(self):
+    return self._fpr
+
+  # is_valid_ipv[46]_address by gsathya, karsten, 2013
+  @staticmethod
+  def is_valid_ipv4_address(address):
+    if not isinstance(address, (str, unicode)):
+      return False
+
+    # check if there are four period separated values
+    if address.count(".") != 3:
+      return False
+
+    # checks that each value in the octet are decimal values between 0-255
+    for entry in address.split("."):
+      if not entry.isdigit() or int(entry) < 0 or int(entry) > 255:
+        return False
+      elif entry[0] == "0" and len(entry) > 1:
+        return False  # leading zeros, for instance in "1.2.3.001"
+
+    return True
+
+  @staticmethod
+  def is_valid_ipv6_address(address):
+    if not isinstance(address, (str, unicode)):
+      return False
+
+    # remove brackets
+    address = address[1:-1]
+
+    # addresses are made up of eight colon separated groups of four hex digits
+    # with leading zeros being optional
+    # https://en.wikipedia.org/wiki/IPv6#Address_format
+
+    colon_count = address.count(":")
+
+    if colon_count > 7:
+      return False  # too many groups
+    elif colon_count != 7 and not "::" in address:
+      return False  # not enough groups and none are collapsed
+    elif address.count("::") > 1 or ":::" in address:
+      return False  # multiple groupings of zeros can't be collapsed
+
+    found_ipv4_on_previous_entry = False
+    for entry in address.split(":"):
+      # If an IPv6 address has an embedded IPv4 address,
+      # it must be the last entry
+      if found_ipv4_on_previous_entry:
+        return False
+      if not re.match("^[0-9a-fA-f]{0,4}$", entry):
+        if not Candidate.is_valid_ipv4_address(entry):
+          return False
+        else:
+          found_ipv4_on_previous_entry = True
+
+    return True
+
+  def _split_dirport(self):
+    # Split the dir_address into dirip and dirport
+    (self.dirip, _dirport) = self._data['dir_address'].split(':', 2)
+    self.dirport = int(_dirport)
+
+  def _compute_orport(self):
+    # Choose the first ORPort that's on the same IPv4 address as the DirPort.
+    # In rare circumstances, this might not be the primary ORPort address.
+    # However, _stable_sort_or_addresses() ensures we choose the same one
+    # every time, even if onionoo changes the order of the secondaries.
+    self._split_dirport()
+    self.orport = None
+    for i in self._data['or_addresses']:
+      if i != self._data['or_addresses'][0]:
+        logging.debug('Secondary IPv4 Address Used for %s: %s'%(self._fpr, i))
+      (ipaddr, port) = i.rsplit(':', 1)
+      if (ipaddr == self.dirip) and Candidate.is_valid_ipv4_address(ipaddr):
+        self.orport = int(port)
+        return
+
+  def _compute_ipv6addr(self):
+    # Choose the first IPv6 address that uses the same port as the ORPort
+    # Or, choose the first IPv6 address in the list
+    # _stable_sort_or_addresses() ensures we choose the same IPv6 address
+    # every time, even if onionoo changes the order of the secondaries.
+    self.ipv6addr = None
+    self.ipv6orport = None
+    # Choose the first IPv6 address that uses the same port as the ORPort
+    for i in self._data['or_addresses']:
+      (ipaddr, port) = i.rsplit(':', 1)
+      if (port == self.orport) and Candidate.is_valid_ipv6_address(ipaddr):
+        self.ipv6addr = ipaddr
+        self.ipv6orport = int(port)
+        return
+    # Choose the first IPv6 address in the list
+    for i in self._data['or_addresses']:
+      (ipaddr, port) = i.rsplit(':', 1)
+      if Candidate.is_valid_ipv6_address(ipaddr):
+        self.ipv6addr = ipaddr
+        self.ipv6orport = int(port)
+        return
+
+  @staticmethod
+  def _extract_generic_history(history, which='unknown'):
+    # given a tree like this:
+    #   {
+    #     "1_month": {
+    #         "count": 187,
+    #         "factor": 0.001001001001001001,
+    #         "first": "2015-02-27 06:00:00",
+    #         "interval": 14400,
+    #         "last": "2015-03-30 06:00:00",
+    #         "values": [
+    #             999,
+    #             999
+    #         ]
+    #     },
+    #     "1_week": {
+    #         "count": 169,
+    #         "factor": 0.001001001001001001,
+    #         "first": "2015-03-23 07:30:00",
+    #         "interval": 3600,
+    #         "last": "2015-03-30 07:30:00",
+    #         "values": [ ...]
+    #     },
+    #     "1_year": {
+    #         "count": 177,
+    #         "factor": 0.001001001001001001,
+    #         "first": "2014-04-11 00:00:00",
+    #         "interval": 172800,
+    #         "last": "2015-03-29 00:00:00",
+    #         "values": [ ...]
+    #     },
+    #     "3_months": {
+    #         "count": 185,
+    #         "factor": 0.001001001001001001,
+    #         "first": "2014-12-28 06:00:00",
+    #         "interval": 43200,
+    #         "last": "2015-03-30 06:00:00",
+    #         "values": [ ...]
+    #     }
+    #   },
+    # extract exactly one piece of data per time interval,
+    # using smaller intervals where available.
+    #
+    # returns list of (age, length, value) dictionaries.
+
+    generic_history = []
+
+    periods = history.keys()
+    periods.sort(key = lambda x: history[x]['interval'])
+    now = datetime.datetime.utcnow()
+    newest = now
+    for p in periods:
+      h = history[p]
+      interval = datetime.timedelta(seconds = h['interval'])
+      this_ts = parse_ts(h['last'])
+
+      if (len(h['values']) != h['count']):
+        logging.warning('Inconsistent value count in %s document for %s'
+                        %(p, which))
+      for v in reversed(h['values']):
+        if (this_ts <= newest):
+          agt1 = now - this_ts
+          agt2 = interval
+          agetmp1 = (agt1.microseconds + (agt1.seconds + agt1.days * 24 * 3600)
+                     * 10**6) / 10**6
+          agetmp2 = (agt2.microseconds + (agt2.seconds + agt2.days * 24 * 3600)
+                     * 10**6) / 10**6
+          generic_history.append(
+            { 'age': agetmp1,
+              'length': agetmp2,
+              'value': v
+            })
+          newest = this_ts
+        this_ts -= interval
+
+      if (this_ts + interval != parse_ts(h['first'])):
+        logging.warning('Inconsistent time information in %s document for %s'
+                        %(p, which))
+
+    #print json.dumps(generic_history, sort_keys=True,
+    #                  indent=4, separators=(',', ': '))
+    return generic_history
+
+  @staticmethod
+  def _avg_generic_history(generic_history):
+    a = []
+    for i in generic_history:
+      if i['age'] > (ADDRESS_AND_PORT_STABLE_DAYS * 24 * 3600):
+        continue
+      if (i['length'] is not None
+          and i['age'] is not None
+          and i['value'] is not None):
+        w = i['length'] * math.pow(AGE_ALPHA, i['age']/(3600*24))
+        a.append( (i['value'] * w, w) )
+
+    sv = math.fsum(map(lambda x: x[0], a))
+    sw = math.fsum(map(lambda x: x[1], a))
+
+    if sw == 0.0:
+      svw = 0.0
+    else:
+      svw = sv/sw
+    return svw
+
+  def _add_generic_history(self, history):
+    periods = r['read_history'].keys()
+    periods.sort(key = lambda x: r['read_history'][x]['interval'] )
+
+    print periods
+
+  def add_running_history(self, history):
+    pass
+
+  def add_uptime(self, uptime):
+    logging.debug('Adding uptime %s.'%(self._fpr,))
+
+    # flags we care about: Running, V2Dir, Guard
+    if not 'flags' in uptime:
+      logging.debug('No flags in document for %s.'%(self._fpr,))
+      return
+
+    for f in ['Running', 'Guard', 'V2Dir']:
+      if not f in uptime['flags']:
+        logging.debug('No %s in flags for %s.'%(f, self._fpr,))
+        return
+
+    running = self._extract_generic_history(uptime['flags']['Running'],
+                                            '%s-Running'%(self._fpr))
+    guard = self._extract_generic_history(uptime['flags']['Guard'],
+                                          '%s-Guard'%(self._fpr))
+    v2dir = self._extract_generic_history(uptime['flags']['V2Dir'],
+                                          '%s-V2Dir'%(self._fpr))
+    if 'BadExit' in uptime['flags']:
+      badexit = self._extract_generic_history(uptime['flags']['BadExit'],
+                                              '%s-BadExit'%(self._fpr))
+
+    self._running = self._avg_generic_history(running) / ONIONOO_SCALE_ONE
+    self._guard = self._avg_generic_history(guard) / ONIONOO_SCALE_ONE
+    self._v2dir = self._avg_generic_history(v2dir) / ONIONOO_SCALE_ONE
+    self._badexit = None
+    if 'BadExit' in uptime['flags']:
+      self._badexit = self._avg_generic_history(badexit) / ONIONOO_SCALE_ONE
+
+  def is_candidate(self):
+    must_be_running_now = (PERFORM_IPV4_DIRPORT_CHECKS
+                           or PERFORM_IPV6_DIRPORT_CHECKS)
+    if (must_be_running_now and not self.is_running()):
+      logging.info('%s not a candidate: not running now, unable to check ' +
+                   'DirPort consensus download', self._fpr)
+      return False
+    if (self._data['last_changed_address_or_port'] >
+        self.CUTOFF_ADDRESS_AND_PORT_STABLE):
+      logging.info('%s not a candidate: changed address/port recently (%s)',
+                   self._fpr, self._data['last_changed_address_or_port'])
+      return False
+    if self._running < CUTOFF_RUNNING:
+      logging.info('%s not a candidate: running avg too low (%lf)',
+                   self._fpr, self._running)
+      return False
+    if self._v2dir < CUTOFF_V2DIR:
+      logging.info('%s not a candidate: v2dir avg too low (%lf)',
+                   self._fpr, self._v2dir)
+      return False
+    if self._badexit is not None and self._badexit > PERMITTED_BADEXIT:
+      logging.info('%s not a candidate: badexit avg too high (%lf)',
+                   self._fpr, self._badexit)
+      return False
+    # if the relay doesn't report a version, also exclude the relay
+    if (not self._data.has_key('recommended_version')
+        or not self._data['recommended_version']):
+      logging.info('%s not a candidate: version not recommended', self._fpr)
+      return False
+    if self._guard < CUTOFF_GUARD:
+      logging.info('%s not a candidate: guard avg too low (%lf)',
+                   self._fpr, self._guard)
+      return False
+    if (not self._data.has_key('consensus_weight')
+        or self._data['consensus_weight'] < 1):
+      logging.info('%s not a candidate: consensus weight invalid', self._fpr)
+      return False
+    return True
+
+  def is_in_whitelist(self, relaylist):
+    """ A fallback matches if each key in the whitelist line matches:
+          ipv4
+          dirport
+          orport
+          id
+          ipv6 address and port (if present)
+        If the fallback has an ipv6 key, the whitelist line must also have
+        it, and vice versa, otherwise they don't match. """
+    ipv6 = None
+    if self.has_ipv6():
+      ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
+    for entry in relaylist:
+      if entry['id'] != self._fpr:
+        # can't log here unless we match an IP and port, because every relay's
+        # fingerprint is compared to every entry's fingerprint
+        if entry['ipv4'] == self.dirip and int(entry['orport']) == self.orport:
+          logging.warning('%s excluded: has OR %s:%d changed fingerprint to ' +
+                          '%s?', entry['id'], self.dirip, self.orport,
+                          self._fpr)
+        if self.has_ipv6() and entry.has_key('ipv6') and entry['ipv6'] == ipv6:
+          logging.warning('%s excluded: has OR %s changed fingerprint to ' +
+                          '%s?', entry['id'], ipv6, self._fpr)
+        continue
+      if entry['ipv4'] != self.dirip:
+        logging.warning('%s excluded: has it changed IPv4 from %s to %s?',
+                        self._fpr, entry['ipv4'], self.dirip)
+        continue
+      if int(entry['dirport']) != self.dirport:
+        logging.warning('%s excluded: has it changed DirPort from %s:%d to ' +
+                        '%s:%d?', self._fpr, self.dirip, int(entry['dirport']),
+                        self.dirip, self.dirport)
+        continue
+      if int(entry['orport']) != self.orport:
+        logging.warning('%s excluded: has it changed ORPort from %s:%d to ' +
+                        '%s:%d?', self._fpr, self.dirip, int(entry['orport']),
+                        self.dirip, self.orport)
+        continue
+      if entry.has_key('ipv6') and self.has_ipv6():
+        # if both entry and fallback have an ipv6 address, compare them
+        if entry['ipv6'] != ipv6:
+          logging.warning('%s excluded: has it changed IPv6 ORPort from %s ' +
+                          'to %s?', self._fpr, entry['ipv6'], ipv6)
+          continue
+      # if the fallback has an IPv6 address but the whitelist entry
+      # doesn't, or vice versa, the whitelist entry doesn't match
+      elif entry.has_key('ipv6') and not self.has_ipv6():
+        logging.warning('%s excluded: has it lost its former IPv6 address %s?',
+                        self._fpr, entry['ipv6'])
+        continue
+      elif not entry.has_key('ipv6') and self.has_ipv6():
+        logging.warning('%s excluded: has it gained an IPv6 address %s?',
+                        self._fpr, ipv6)
+        continue
+      return True
+    return False
+
+  def is_in_blacklist(self, relaylist):
+    """ A fallback matches a blacklist line if a sufficiently specific group
+        of attributes matches:
+          ipv4 & dirport
+          ipv4 & orport
+          id
+          ipv6 & dirport
+          ipv6 & ipv6 orport
+        If the fallback and the blacklist line both have an ipv6 key,
+        their values will be compared, otherwise, they will be ignored.
+        If there is no dirport and no orport, the entry matches all relays on
+        that ip. """
+    for entry in relaylist:
+      for key in entry:
+        value = entry[key]
+        if key == 'id' and value == self._fpr:
+          logging.info('%s is in the blacklist: fingerprint matches',
+                       self._fpr)
+          return True
+        if key == 'ipv4' and value == self.dirip:
+          # if the dirport is present, check it too
+          if entry.has_key('dirport'):
+            if int(entry['dirport']) == self.dirport:
+              logging.info('%s is in the blacklist: IPv4 (%s) and ' +
+                           'DirPort (%d) match', self._fpr, self.dirip,
+                           self.dirport)
+              return True
+          # if the orport is present, check it too
+          elif entry.has_key('orport'):
+            if int(entry['orport']) == self.orport:
+              logging.info('%s is in the blacklist: IPv4 (%s) and ' +
+                           'ORPort (%d) match', self._fpr, self.dirip,
+                           self.orport)
+              return True
+          else:
+            logging.info('%s is in the blacklist: IPv4 (%s) matches, and ' +
+                         'entry has no DirPort or ORPort', self._fpr,
+                         self.dirip)
+            return True
+        ipv6 = None
+        if self.has_ipv6():
+          ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
+        if (key == 'ipv6' and self.has_ipv6()):
+        # if both entry and fallback have an ipv6 address, compare them,
+        # otherwise, disregard ipv6 addresses
+          if value == ipv6:
+            # if the dirport is present, check it too
+            if entry.has_key('dirport'):
+              if int(entry['dirport']) == self.dirport:
+                logging.info('%s is in the blacklist: IPv6 (%s) and ' +
+                             'DirPort (%d) match', self._fpr, ipv6,
+                             self.dirport)
+                return True
+            # we've already checked the ORPort, it's part of entry['ipv6']
+            else:
+              logging.info('%s is in the blacklist: IPv6 (%s) matches, and' +
+                           'entry has no DirPort', self._fpr, ipv6)
+              return True
+        elif (key == 'ipv6' or self.has_ipv6()):
+          # only log if the fingerprint matches but the IPv6 doesn't
+          if entry.has_key('id') and entry['id'] == self._fpr:
+            logging.info('%s skipping IPv6 blacklist comparison: relay ' +
+                         'has%s IPv6%s, but entry has%s IPv6%s', self._fpr,
+                         '' if self.has_ipv6() else ' no',
+                         (' (' + ipv6 + ')') if self.has_ipv6() else  '',
+                         '' if key == 'ipv6' else ' no',
+                         (' (' + value + ')') if key == 'ipv6' else '')
+            logging.warning('Has %s %s IPv6 address %s?', self._fpr,
+                        'gained an' if self.has_ipv6() else 'lost its former',
+                        ipv6 if self.has_ipv6() else value)
+    return False
+
+  def cw_to_bw_factor(self):
+    # any relays with a missing or zero consensus weight are not candidates
+    # any relays with a missing advertised bandwidth have it set to zero
+    return self._data['advertised_bandwidth'] / self._data['consensus_weight']
+
+  # since advertised_bandwidth is reported by the relay, it can be gamed
+  # to avoid this, use the median consensus weight to bandwidth factor to
+  # estimate this relay's measured bandwidth, and make that the upper limit
+  def measured_bandwidth(self, median_cw_to_bw_factor):
+    cw_to_bw= median_cw_to_bw_factor
+    # Reduce exit bandwidth to make sure we're not overloading them
+    if self.is_exit():
+      cw_to_bw *= EXIT_BANDWIDTH_FRACTION
+    measured_bandwidth = self._data['consensus_weight'] * cw_to_bw
+    if self._data['advertised_bandwidth'] != 0:
+      # limit advertised bandwidth (if available) to measured bandwidth
+      return min(measured_bandwidth, self._data['advertised_bandwidth'])
+    else:
+      return measured_bandwidth
+
+  def set_measured_bandwidth(self, median_cw_to_bw_factor):
+    self._data['measured_bandwidth'] = self.measured_bandwidth(
+                                                      median_cw_to_bw_factor)
+
+  def is_exit(self):
+    return 'Exit' in self._data['flags']
+
+  def is_guard(self):
+    return 'Guard' in self._data['flags']
+
+  def is_running(self):
+    return 'Running' in self._data['flags']
+
+  # does this fallback have an IPv6 address and orport?
+  def has_ipv6(self):
+    return self.ipv6addr is not None and self.ipv6orport is not None
+
+  # strip leading and trailing brackets from an IPv6 address
+  # safe to use on non-bracketed IPv6 and on IPv4 addresses
+  # also convert to unicode, and make None appear as ''
+  @staticmethod
+  def strip_ipv6_brackets(ip):
+    if ip is None:
+      return unicode('')
+    if len(ip) < 2:
+      return unicode(ip)
+    if ip[0] == '[' and ip[-1] == ']':
+      return unicode(ip[1:-1])
+    return unicode(ip)
+
+  # are ip_a and ip_b in the same netblock?
+  # mask_bits is the size of the netblock
+  # takes both IPv4 and IPv6 addresses
+  # the versions of ip_a and ip_b must be the same
+  # the mask must be valid for the IP version
+  @staticmethod
+  def netblocks_equal(ip_a, ip_b, mask_bits):
+    if ip_a is None or ip_b is None:
+      return False
+    ip_a = Candidate.strip_ipv6_brackets(ip_a)
+    ip_b = Candidate.strip_ipv6_brackets(ip_b)
+    a = ipaddress.ip_address(ip_a)
+    b = ipaddress.ip_address(ip_b)
+    if a.version != b.version:
+      raise Exception('Mismatching IP versions in %s and %s'%(ip_a, ip_b))
+    if mask_bits > a.max_prefixlen:
+      logging.error('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b))
+      mask_bits = a.max_prefixlen
+    if mask_bits < 0:
+      logging.error('Bad IP mask %d for %s and %s'%(mask_bits, ip_a, ip_b))
+      mask_bits = 0
+    a_net = ipaddress.ip_network('%s/%d'%(ip_a, mask_bits), strict=False)
+    return b in a_net
+
+  # is this fallback's IPv4 address (dirip) in the same netblock as other's
+  # IPv4 address?
+  # mask_bits is the size of the netblock
+  def ipv4_netblocks_equal(self, other, mask_bits):
+    return Candidate.netblocks_equal(self.dirip, other.dirip, mask_bits)
+
+  # is this fallback's IPv6 address (ipv6addr) in the same netblock as
+  # other's IPv6 address?
+  # Returns False if either fallback has no IPv6 address
+  # mask_bits is the size of the netblock
+  def ipv6_netblocks_equal(self, other, mask_bits):
+    if not self.has_ipv6() or not other.has_ipv6():
+      return False
+    return Candidate.netblocks_equal(self.ipv6addr, other.ipv6addr, mask_bits)
+
+  # is this fallback's IPv4 DirPort the same as other's IPv4 DirPort?
+  def dirport_equal(self, other):
+    return self.dirport == other.dirport
+
+  # is this fallback's IPv4 ORPort the same as other's IPv4 ORPort?
+  def ipv4_orport_equal(self, other):
+    return self.orport == other.orport
+
+  # is this fallback's IPv6 ORPort the same as other's IPv6 ORPort?
+  # Returns False if either fallback has no IPv6 address
+  def ipv6_orport_equal(self, other):
+    if not self.has_ipv6() or not other.has_ipv6():
+      return False
+    return self.ipv6orport == other.ipv6orport
+
+  # does this fallback have the same DirPort, IPv4 ORPort, or
+  # IPv6 ORPort as other?
+  # Ignores IPv6 ORPort if either fallback has no IPv6 address
+  def port_equal(self, other):
+    return (self.dirport_equal(other) or self.ipv4_orport_equal(other)
+            or self.ipv6_orport_equal(other))
+
+  # return a list containing IPv4 ORPort, DirPort, and IPv6 ORPort (if present)
+  def port_list(self):
+    ports = [self.dirport, self.orport]
+    if self.has_ipv6() and not self.ipv6orport in ports:
+      ports.append(self.ipv6orport)
+    return ports
+
+  # does this fallback share a port with other, regardless of whether the
+  # port types match?
+  # For example, if self's IPv4 ORPort is 80 and other's DirPort is 80,
+  # return True
+  def port_shared(self, other):
+    for p in self.port_list():
+      if p in other.port_list():
+        return True
+    return False
+
+  # report how long it takes to download a consensus from dirip:dirport
+  @staticmethod
+  def fallback_consensus_download_speed(dirip, dirport, nickname, max_time):
+    download_failed = False
+    downloader = DescriptorDownloader()
+    start = datetime.datetime.utcnow()
+    # some directory mirrors respond to requests in ways that hang python
+    # sockets, which is why we log this line here
+    logging.info('Initiating consensus download from %s (%s:%d).', nickname,
+                 dirip, dirport)
+    # there appears to be about 1 second of overhead when comparing stem's
+    # internal trace time and the elapsed time calculated here
+    TIMEOUT_SLOP = 1.0
+    try:
+      downloader.get_consensus(endpoints = [(dirip, dirport)],
+                               timeout = (max_time + TIMEOUT_SLOP),
+                               validate = True,
+                               retries = 0,
+                               fall_back_to_authority = False).run()
+    except Exception, stem_error:
+      logging.info('Unable to retrieve a consensus from %s: %s', nickname,
+                    stem_error)
+      status = 'error: "%s"' % (stem_error)
+      level = logging.WARNING
+      download_failed = True
+    elapsed = (datetime.datetime.utcnow() - start).total_seconds()
+    if elapsed > max_time:
+      status = 'too slow'
+      level = logging.WARNING
+      download_failed = True
+    else:
+      status = 'ok'
+      level = logging.DEBUG
+    logging.log(level, 'Consensus download: %0.1fs %s from %s (%s:%d), ' +
+                 'max download time %0.1fs.', elapsed, status, nickname,
+                 dirip, dirport, max_time)
+    return download_failed
+
+  # does this fallback download the consensus fast enough?
+  def check_fallback_download_consensus(self):
+    # include the relay if we're not doing a check, or we can't check (IPv6)
+    ipv4_failed = False
+    ipv6_failed = False
+    if PERFORM_IPV4_DIRPORT_CHECKS:
+      ipv4_failed = Candidate.fallback_consensus_download_speed(self.dirip,
+                                                self.dirport,
+                                                self._data['nickname'],
+                                                CONSENSUS_DOWNLOAD_SPEED_MAX)
+    if self.has_ipv6() and PERFORM_IPV6_DIRPORT_CHECKS:
+      # Clients assume the IPv6 DirPort is the same as the IPv4 DirPort
+      ipv6_failed = Candidate.fallback_consensus_download_speed(self.ipv6addr,
+                                                self.dirport,
+                                                self._data['nickname'],
+                                                CONSENSUS_DOWNLOAD_SPEED_MAX)
+    return ((not ipv4_failed) and (not ipv6_failed))
+
+  # if this fallback has not passed a download check, try it again,
+  # and record the result, available in get_fallback_download_consensus
+  def try_fallback_download_consensus(self):
+    if not self.get_fallback_download_consensus():
+      self._data['download_check'] = self.check_fallback_download_consensus()
+
+  # did this fallback pass the download check?
+  def get_fallback_download_consensus(self):
+    # if we're not performing checks, return True
+    if not PERFORM_IPV4_DIRPORT_CHECKS and not PERFORM_IPV6_DIRPORT_CHECKS:
+      return True
+    # if we are performing checks, but haven't done one, return False
+    if not self._data.has_key('download_check'):
+      return False
+    return self._data['download_check']
+
+  # output an optional header comment and info for this fallback
+  # try_fallback_download_consensus before calling this
+  def fallbackdir_line(self, fallbacks, prefilter_fallbacks):
+    s = ''
+    if OUTPUT_COMMENTS:
+      s += self.fallbackdir_comment(fallbacks, prefilter_fallbacks)
+    # if the download speed is ok, output a C string
+    # if it's not, but we OUTPUT_COMMENTS, output a commented-out C string
+    if self.get_fallback_download_consensus() or OUTPUT_COMMENTS:
+      s += self.fallbackdir_info(self.get_fallback_download_consensus())
+    return s
+
+  # output a header comment for this fallback
+  def fallbackdir_comment(self, fallbacks, prefilter_fallbacks):
+    # /*
+    # nickname
+    # flags
+    # [contact]
+    # [identical contact counts]
+    # */
+    # Multiline C comment
+    s = '/*'
+    s += '\n'
+    s += cleanse_c_multiline_comment(self._data['nickname'])
+    s += '\n'
+    s += 'Flags: '
+    s += cleanse_c_multiline_comment(' '.join(sorted(self._data['flags'])))
+    s += '\n'
+    if self._data['contact'] is not None:
+      s += cleanse_c_multiline_comment(self._data['contact'])
+      if CONTACT_COUNT or CONTACT_BLACKLIST_COUNT:
+        fallback_count = len([f for f in fallbacks
+                              if f._data['contact'] == self._data['contact']])
+        if fallback_count > 1:
+          s += '\n'
+          s += '%d identical contacts listed' % (fallback_count)
+      if CONTACT_BLACKLIST_COUNT:
+        prefilter_count = len([f for f in prefilter_fallbacks
+                               if f._data['contact'] == self._data['contact']])
+        filter_count = prefilter_count - fallback_count
+        if filter_count > 0:
+          if fallback_count > 1:
+            s += ' '
+          else:
+            s += '\n'
+          s += '%d blacklisted' % (filter_count)
+      s += '\n'
+    s += '*/'
+    s += '\n'
+
+  # output the fallback info C string for this fallback
+  # this is the text that would go after FallbackDir in a torrc
+  # if this relay failed the download test and we OUTPUT_COMMENTS,
+  # comment-out the returned string
+  def fallbackdir_info(self, dl_speed_ok):
+    # "address:dirport orport=port id=fingerprint"
+    # "[ipv6=addr:orport]"
+    # "weight=FALLBACK_OUTPUT_WEIGHT",
+    #
+    # Do we want a C string, or a commented-out string?
+    c_string = dl_speed_ok
+    comment_string = not dl_speed_ok and OUTPUT_COMMENTS
+    # If we don't want either kind of string, bail
+    if not c_string and not comment_string:
+      return ''
+    s = ''
+    # Comment out the fallback directory entry if it's too slow
+    # See the debug output for which address and port is failing
+    if comment_string:
+      s += '/* Consensus download failed or was too slow:\n'
+    # Multi-Line C string with trailing comma (part of a string list)
+    # This makes it easier to diff the file, and remove IPv6 lines using grep
+    # Integers don't need escaping
+    s += '"%s orport=%d id=%s"'%(
+            cleanse_c_string(self._data['dir_address']),
+            self.orport,
+            cleanse_c_string(self._fpr))
+    s += '\n'
+    if self.has_ipv6():
+      s += '" ipv6=%s:%d"'%(cleanse_c_string(self.ipv6addr), self.ipv6orport)
+      s += '\n'
+    s += '" weight=%d",'%(FALLBACK_OUTPUT_WEIGHT)
+    if comment_string:
+      s += '\n'
+      s += '*/'
+    return s
+
+## Fallback Candidate List Class
+
+class CandidateList(dict):
+  def __init__(self):
+    pass
+
+  def _add_relay(self, details):
+    if not 'dir_address' in details: return
+    c = Candidate(details)
+    self[ c.get_fingerprint() ] = c
+
+  def _add_uptime(self, uptime):
+    try:
+      fpr = uptime['fingerprint']
+    except KeyError:
+      raise Exception("Document has no fingerprint field.")
+
+    try:
+      c = self[fpr]
+    except KeyError:
+      logging.debug('Got unknown relay %s in uptime document.'%(fpr,))
+      return
+
+    c.add_uptime(uptime)
+
+  def _add_details(self):
+    logging.debug('Loading details document.')
+    d = fetch('details',
+        fields=('fingerprint,nickname,contact,last_changed_address_or_port,' +
+                'consensus_weight,advertised_bandwidth,or_addresses,' +
+                'dir_address,recommended_version,flags,effective_family'))
+    logging.debug('Loading details document done.')
+
+    if not 'relays' in d: raise Exception("No relays found in document.")
+
+    for r in d['relays']: self._add_relay(r)
+
+  def _add_uptimes(self):
+    logging.debug('Loading uptime document.')
+    d = fetch('uptime')
+    logging.debug('Loading uptime document done.')
+
+    if not 'relays' in d: raise Exception("No relays found in document.")
+    for r in d['relays']: self._add_uptime(r)
+
+  def add_relays(self):
+    self._add_details()
+    self._add_uptimes()
+
+  def count_guards(self):
+    guard_count = 0
+    for fpr in self.keys():
+      if self[fpr].is_guard():
+        guard_count += 1
+    return guard_count
+
+  # Find fallbacks that fit the uptime, stability, and flags criteria,
+  # and make an array of them in self.fallbacks
+  def compute_fallbacks(self):
+    self.fallbacks = map(lambda x: self[x],
+                         filter(lambda x: self[x].is_candidate(),
+                                self.keys()))
+
+  # sort fallbacks by their consensus weight to advertised bandwidth factor,
+  # lowest to highest
+  # used to find the median cw_to_bw_factor()
+  def sort_fallbacks_by_cw_to_bw_factor(self):
+    self.fallbacks.sort(key=lambda f: f.cw_to_bw_factor())
+
+  # sort fallbacks by their measured bandwidth, highest to lowest
+  # calculate_measured_bandwidth before calling this
+  # this is useful for reviewing candidates in priority order
+  def sort_fallbacks_by_measured_bandwidth(self):
+    self.fallbacks.sort(key=lambda f: f._data['measured_bandwidth'],
+                        reverse=True)
+
+  # sort fallbacks by their fingerprint, lowest to highest
+  # this is useful for stable diffs of fallback lists
+  def sort_fallbacks_by_fingerprint(self):
+    self.fallbacks.sort(key=lambda f: f._fpr)
+
+  @staticmethod
+  def load_relaylist(file_name):
+    """ Read each line in the file, and parse it like a FallbackDir line:
+        an IPv4 address and optional port:
+          <IPv4 address>:<port>
+        which are parsed into dictionary entries:
+          ipv4=<IPv4 address>
+          dirport=<port>
+        followed by a series of key=value entries:
+          orport=<port>
+          id=<fingerprint>
+          ipv6=<IPv6 address>:<IPv6 orport>
+        each line's key/value pairs are placed in a dictonary,
+        (of string -> string key/value pairs),
+        and these dictionaries are placed in an array.
+        comments start with # and are ignored """
+    relaylist = []
+    file_data = read_from_file(file_name, MAX_LIST_FILE_SIZE)
+    if file_data is None:
+      return relaylist
+    for line in file_data.split('\n'):
+      relay_entry = {}
+      # ignore comments
+      line_comment_split = line.split('#')
+      line = line_comment_split[0]
+      # cleanup whitespace
+      line = cleanse_whitespace(line)
+      line = line.strip()
+      if len(line) == 0:
+        continue
+      for item in line.split(' '):
+        item = item.strip()
+        if len(item) == 0:
+          continue
+        key_value_split = item.split('=')
+        kvl = len(key_value_split)
+        if kvl < 1 or kvl > 2:
+          print '#error Bad %s item: %s, format is key=value.'%(
+                                                 file_name, item)
+        if kvl == 1:
+          # assume that entries without a key are the ipv4 address,
+          # perhaps with a dirport
+          ipv4_maybe_dirport = key_value_split[0]
+          ipv4_maybe_dirport_split = ipv4_maybe_dirport.split(':')
+          dirl = len(ipv4_maybe_dirport_split)
+          if dirl < 1 or dirl > 2:
+            print '#error Bad %s IPv4 item: %s, format is ipv4:port.'%(
+                                                        file_name, item)
+          if dirl >= 1:
+            relay_entry['ipv4'] = ipv4_maybe_dirport_split[0]
+          if dirl == 2:
+            relay_entry['dirport'] = ipv4_maybe_dirport_split[1]
+        elif kvl == 2:
+          relay_entry[key_value_split[0]] = key_value_split[1]
+      relaylist.append(relay_entry)
+    return relaylist
+
+  # apply the fallback whitelist and blacklist
+  def apply_filter_lists(self):
+    excluded_count = 0
+    logging.debug('Applying whitelist and blacklist.')
+    # parse the whitelist and blacklist
+    whitelist = self.load_relaylist(WHITELIST_FILE_NAME)
+    blacklist = self.load_relaylist(BLACKLIST_FILE_NAME)
+    filtered_fallbacks = []
+    for f in self.fallbacks:
+      in_whitelist = f.is_in_whitelist(whitelist)
+      in_blacklist = f.is_in_blacklist(blacklist)
+      if in_whitelist and in_blacklist:
+        if BLACKLIST_EXCLUDES_WHITELIST_ENTRIES:
+          # exclude
+          excluded_count += 1
+          logging.warning('Excluding %s: in both blacklist and whitelist.',
+                          f._fpr)
+        else:
+          # include
+          filtered_fallbacks.append(f)
+      elif in_whitelist:
+        # include
+        filtered_fallbacks.append(f)
+      elif in_blacklist:
+        # exclude
+        excluded_count += 1
+        logging.info('Excluding %s: in blacklist.', f._fpr)
+      else:
+        if INCLUDE_UNLISTED_ENTRIES:
+          # include
+          filtered_fallbacks.append(f)
+        else:
+          # exclude
+          excluded_count += 1
+          logging.info('Excluding %s: in neither blacklist nor whitelist.',
+                       f._fpr)
+    self.fallbacks = filtered_fallbacks
+    return excluded_count
+
+  @staticmethod
+  def summarise_filters(initial_count, excluded_count):
+    return '/* Whitelist & blacklist excluded %d of %d candidates. */'%(
+                                                excluded_count, initial_count)
+
+  # calculate each fallback's measured bandwidth based on the median
+  # consensus weight to advertised bandwdith ratio
+  def calculate_measured_bandwidth(self):
+    self.sort_fallbacks_by_cw_to_bw_factor()
+    median_fallback = self.fallback_median(True)
+    if median_fallback is not None:
+      median_cw_to_bw_factor = median_fallback.cw_to_bw_factor()
+    else:
+      # this will never be used, because there are no fallbacks
+      median_cw_to_bw_factor = None
+    for f in self.fallbacks:
+      f.set_measured_bandwidth(median_cw_to_bw_factor)
+
+  # remove relays with low measured bandwidth from the fallback list
+  # calculate_measured_bandwidth for each relay before calling this
+  def remove_low_bandwidth_relays(self):
+    if MIN_BANDWIDTH is None:
+      return
+    above_min_bw_fallbacks = []
+    for f in self.fallbacks:
+      if f._data['measured_bandwidth'] >= MIN_BANDWIDTH:
+        above_min_bw_fallbacks.append(f)
+      else:
+        # the bandwidth we log here is limited by the relay's consensus weight
+        # as well as its adverttised bandwidth. See set_measured_bandwidth
+        # for details
+        logging.info('%s not a candidate: bandwidth %.1fMB/s too low, must ' +
+                     'be at least %.1fMB/s', f._fpr,
+                     f._data['measured_bandwidth']/(1024.0*1024.0),
+                     MIN_BANDWIDTH/(1024.0*1024.0))
+    self.fallbacks = above_min_bw_fallbacks
+
+  # the minimum fallback in the list
+  # call one of the sort_fallbacks_* functions before calling this
+  def fallback_min(self):
+    if len(self.fallbacks) > 0:
+      return self.fallbacks[-1]
+    else:
+      return None
+
+  # the median fallback in the list
+  # call one of the sort_fallbacks_* functions before calling this
+  def fallback_median(self, require_advertised_bandwidth):
+    # use the low-median when there are an evan number of fallbacks,
+    # for consistency with the bandwidth authorities
+    if len(self.fallbacks) > 0:
+      median_position = (len(self.fallbacks) - 1) / 2
+      if not require_advertised_bandwidth:
+        return self.fallbacks[median_position]
+      # if we need advertised_bandwidth but this relay doesn't have it,
+      # move to a fallback with greater consensus weight until we find one
+      while not self.fallbacks[median_position]._data['advertised_bandwidth']:
+        median_position += 1
+        if median_position >= len(self.fallbacks):
+          return None
+      return self.fallbacks[median_position]
+    else:
+      return None
+
+  # the maximum fallback in the list
+  # call one of the sort_fallbacks_* functions before calling this
+  def fallback_max(self):
+    if len(self.fallbacks) > 0:
+      return self.fallbacks[0]
+    else:
+      return None
+
+  # does exclusion_list contain attribute?
+  # if so, return False
+  # if not, return True
+  # if attribute is None or the empty string, always return True
+  @staticmethod
+  def allow(attribute, exclusion_list):
+    if attribute is None or attribute == '':
+      return True
+    elif attribute in exclusion_list:
+      return False
+    else:
+      return True
+
+  # make sure there is only one fallback per IPv4 address, and per IPv6 address
+  # there is only one IPv4 address on each fallback: the IPv4 DirPort address
+  # (we choose the IPv4 ORPort which is on the same IPv4 as the DirPort)
+  # there is at most one IPv6 address on each fallback: the IPv6 ORPort address
+  # we try to match the IPv4 ORPort, but will use any IPv6 address if needed
+  # (clients assume the IPv6 DirPort is the same as the IPv4 DirPort, but
+  # typically only use the IPv6 ORPort)
+  # if there is no IPv6 address, only the IPv4 address is checked
+  # return the number of candidates we excluded
+  def limit_fallbacks_same_ip(self):
+    ip_limit_fallbacks = []
+    ip_list = []
+    for f in self.fallbacks:
+      if (CandidateList.allow(f.dirip, ip_list)
+          and CandidateList.allow(f.ipv6addr, ip_list)):
+        ip_limit_fallbacks.append(f)
+        ip_list.append(f.dirip)
+        if f.has_ipv6():
+          ip_list.append(f.ipv6addr)
+      elif not CandidateList.allow(f.dirip, ip_list):
+        logging.info('Eliminated %s: already have fallback on IPv4 %s'%(
+                                                          f._fpr, f.dirip))
+      elif f.has_ipv6() and not CandidateList.allow(f.ipv6addr, ip_list):
+        logging.info('Eliminated %s: already have fallback on IPv6 %s'%(
+                                                          f._fpr, f.ipv6addr))
+    original_count = len(self.fallbacks)
+    self.fallbacks = ip_limit_fallbacks
+    return original_count - len(self.fallbacks)
+
+  # make sure there is only one fallback per ContactInfo
+  # if there is no ContactInfo, allow the fallback
+  # this check can be gamed by providing no ContactInfo, or by setting the
+  # ContactInfo to match another fallback
+  # However, given the likelihood that relays with the same ContactInfo will
+  # go down at similar times, its usefulness outweighs the risk
+  def limit_fallbacks_same_contact(self):
+    contact_limit_fallbacks = []
+    contact_list = []
+    for f in self.fallbacks:
+      if CandidateList.allow(f._data['contact'], contact_list):
+        contact_limit_fallbacks.append(f)
+        contact_list.append(f._data['contact'])
+      else:
+        logging.info(('Eliminated %s: already have fallback on ' +
+                       'ContactInfo %s')%(f._fpr, f._data['contact']))
+    original_count = len(self.fallbacks)
+    self.fallbacks = contact_limit_fallbacks
+    return original_count - len(self.fallbacks)
+
+  # make sure there is only one fallback per effective family
+  # if there is no family, allow the fallback
+  # this check can't be gamed, because we use effective family, which ensures
+  # mutual family declarations
+  # if any indirect families exist, the result depends on the order in which
+  # fallbacks are sorted in the list
+  def limit_fallbacks_same_family(self):
+    family_limit_fallbacks = []
+    fingerprint_list = []
+    for f in self.fallbacks:
+      if CandidateList.allow(f._fpr, fingerprint_list):
+        family_limit_fallbacks.append(f)
+        fingerprint_list.append(f._fpr)
+        fingerprint_list.extend(f._data['effective_family'])
+      else:
+        # technically, we already have a fallback with this fallback in its
+        # effective family
+        logging.info('Eliminated %s: already have fallback in effective ' +
+                      'family'%(f._fpr))
+    original_count = len(self.fallbacks)
+    self.fallbacks = family_limit_fallbacks
+    return original_count - len(self.fallbacks)
+
+  # try a download check on each fallback candidate in order
+  # stop after max_count successful downloads
+  # but don't remove any candidates from the array
+  def try_download_consensus_checks(self, max_count):
+    dl_ok_count = 0
+    for f in self.fallbacks:
+      f.try_fallback_download_consensus()
+      if f.get_fallback_download_consensus():
+        # this fallback downloaded a consensus ok
+        dl_ok_count += 1
+        if dl_ok_count >= max_count:
+          # we have enough fallbacks
+          return
+
+  # put max_count successful candidates in the fallbacks array:
+  # - perform download checks on each fallback candidate
+  # - retry failed candidates if CONSENSUS_DOWNLOAD_RETRY is set
+  # - eliminate failed candidates
+  # - if there are more than max_count candidates, eliminate lowest bandwidth
+  # - if there are fewer than max_count candidates, leave only successful
+  # Return the number of fallbacks that failed the consensus check
+  def perform_download_consensus_checks(self, max_count):
+    self.sort_fallbacks_by_measured_bandwidth()
+    self.try_download_consensus_checks(max_count)
+    if CONSENSUS_DOWNLOAD_RETRY:
+      # try unsuccessful candidates again
+      # we could end up with more than max_count successful candidates here
+      self.try_download_consensus_checks(max_count)
+    # now we have at least max_count successful candidates,
+    # or we've tried them all
+    original_count = len(self.fallbacks)
+    self.fallbacks = filter(lambda x: x.get_fallback_download_consensus(),
+                            self.fallbacks)
+    # some of these failed the check, others skipped the check,
+    # if we already had enough successful downloads
+    failed_count = original_count - len(self.fallbacks)
+    self.fallbacks = self.fallbacks[:max_count]
+    return failed_count
+
+  # return a string that describes a/b as a percentage
+  @staticmethod
+  def describe_percentage(a, b):
+    if b != 0:
+      return '%d/%d = %.0f%%'%(a, b, (a*100.0)/b)
+    else:
+      # technically, 0/0 is undefined, but 0.0% is a sensible result
+      return '%d/%d = %.0f%%'%(a, b, 0.0)
+
+  # return a dictionary of lists of fallbacks by IPv4 netblock
+  # the dictionary is keyed by the fingerprint of an arbitrary fallback
+  # in each netblock
+  # mask_bits is the size of the netblock
+  def fallbacks_by_ipv4_netblock(self, mask_bits):
+    netblocks = {}
+    for f in self.fallbacks:
+      found_netblock = False
+      for b in netblocks.keys():
+        # we found an existing netblock containing this fallback
+        if f.ipv4_netblocks_equal(self[b], mask_bits):
+          # add it to the list
+          netblocks[b].append(f)
+          found_netblock = True
+          break
+      # make a new netblock based on this fallback's fingerprint
+      if not found_netblock:
+        netblocks[f._fpr] = [f]
+    return netblocks
+
+  # return a dictionary of lists of fallbacks by IPv6 netblock
+  # where mask_bits is the size of the netblock
+  def fallbacks_by_ipv6_netblock(self, mask_bits):
+    netblocks = {}
+    for f in self.fallbacks:
+      # skip fallbacks without IPv6 addresses
+      if not f.has_ipv6():
+        continue
+      found_netblock = False
+      for b in netblocks.keys():
+        # we found an existing netblock containing this fallback
+        if f.ipv6_netblocks_equal(self[b], mask_bits):
+          # add it to the list
+          netblocks[b].append(f)
+          found_netblock = True
+          break
+      # make a new netblock based on this fallback's fingerprint
+      if not found_netblock:
+        netblocks[f._fpr] = [f]
+    return netblocks
+
+  # log a message about the proportion of fallbacks in each IPv4 netblock,
+  # where mask_bits is the size of the netblock
+  def describe_fallback_ipv4_netblock_mask(self, mask_bits):
+    fallback_count = len(self.fallbacks)
+    shared_netblock_fallback_count = 0
+    most_frequent_netblock = None
+    netblocks = self.fallbacks_by_ipv4_netblock(mask_bits)
+    for b in netblocks.keys():
+      if len(netblocks[b]) > 1:
+        # how many fallbacks are in a netblock with other fallbacks?
+        shared_netblock_fallback_count += len(netblocks[b])
+        # what's the netblock with the most fallbacks?
+        if (most_frequent_netblock is None
+            or len(netblocks[b]) > len(netblocks[most_frequent_netblock])):
+          most_frequent_netblock = b
+        logging.debug('Fallback IPv4 addresses in the same /%d:'%(mask_bits))
+        for f in netblocks[b]:
+          logging.debug('%s - %s', f.dirip, f._fpr)
+    if most_frequent_netblock is not None:
+      logging.warning('There are %s fallbacks in the IPv4 /%d containing %s'%(
+                                    CandidateList.describe_percentage(
+                                      len(netblocks[most_frequent_netblock]),
+                                      fallback_count),
+                                    mask_bits,
+                                    self[most_frequent_netblock].dirip))
+    if shared_netblock_fallback_count > 0:
+      logging.warning(('%s of fallbacks are in an IPv4 /%d with other ' +
+                       'fallbacks')%(CandidateList.describe_percentage(
+                                                shared_netblock_fallback_count,
+                                                fallback_count),
+                                     mask_bits))
+
+  # log a message about the proportion of fallbacks in each IPv6 netblock,
+  # where mask_bits is the size of the netblock
+  def describe_fallback_ipv6_netblock_mask(self, mask_bits):
+    fallback_count = len(self.fallbacks_with_ipv6())
+    shared_netblock_fallback_count = 0
+    most_frequent_netblock = None
+    netblocks = self.fallbacks_by_ipv6_netblock(mask_bits)
+    for b in netblocks.keys():
+      if len(netblocks[b]) > 1:
+        # how many fallbacks are in a netblock with other fallbacks?
+        shared_netblock_fallback_count += len(netblocks[b])
+        # what's the netblock with the most fallbacks?
+        if (most_frequent_netblock is None
+            or len(netblocks[b]) > len(netblocks[most_frequent_netblock])):
+          most_frequent_netblock = b
+        logging.debug('Fallback IPv6 addresses in the same /%d:'%(mask_bits))
+        for f in netblocks[b]:
+          logging.debug('%s - %s', f.ipv6addr, f._fpr)
+    if most_frequent_netblock is not None:
+      logging.warning('There are %s fallbacks in the IPv6 /%d containing %s'%(
+                                    CandidateList.describe_percentage(
+                                      len(netblocks[most_frequent_netblock]),
+                                      fallback_count),
+                                    mask_bits,
+                                    self[most_frequent_netblock].ipv6addr))
+    if shared_netblock_fallback_count > 0:
+      logging.warning(('%s of fallbacks are in an IPv6 /%d with other ' +
+                       'fallbacks')%(CandidateList.describe_percentage(
+                                                shared_netblock_fallback_count,
+                                                fallback_count),
+                                     mask_bits))
+
+  # log a message about the proportion of fallbacks in each IPv4 /8, /16,
+  # and /24
+  def describe_fallback_ipv4_netblocks(self):
+   # this doesn't actually tell us anything useful
+   #self.describe_fallback_ipv4_netblock_mask(8)
+   self.describe_fallback_ipv4_netblock_mask(16)
+   self.describe_fallback_ipv4_netblock_mask(24)
+
+  # log a message about the proportion of fallbacks in each IPv6 /12 (RIR),
+  # /23 (smaller RIR blocks), /32 (LIR), /48 (Customer), and /64 (Host)
+  # https://www.iana.org/assignments/ipv6-unicast-address-assignments/
+  def describe_fallback_ipv6_netblocks(self):
+    # these don't actually tell us anything useful
+    #self.describe_fallback_ipv6_netblock_mask(12)
+    #self.describe_fallback_ipv6_netblock_mask(23)
+    self.describe_fallback_ipv6_netblock_mask(32)
+    self.describe_fallback_ipv6_netblock_mask(48)
+    self.describe_fallback_ipv6_netblock_mask(64)
+
+  # log a message about the proportion of fallbacks in each IPv4 and IPv6
+  # netblock
+  def describe_fallback_netblocks(self):
+    self.describe_fallback_ipv4_netblocks()
+    self.describe_fallback_ipv6_netblocks()
+
+  # return a list of fallbacks which are on the IPv4 ORPort port
+  def fallbacks_on_ipv4_orport(self, port):
+    return filter(lambda x: x.orport == port, self.fallbacks)
+
+  # return a list of fallbacks which are on the IPv6 ORPort port
+  def fallbacks_on_ipv6_orport(self, port):
+    return filter(lambda x: x.ipv6orport == port, self.fallbacks_with_ipv6())
+
+  # return a list of fallbacks which are on the DirPort port
+  def fallbacks_on_dirport(self, port):
+    return filter(lambda x: x.dirport == port, self.fallbacks)
+
+  # log a message about the proportion of fallbacks on IPv4 ORPort port
+  # and return that count
+  def describe_fallback_ipv4_orport(self, port):
+    port_count = len(self.fallbacks_on_ipv4_orport(port))
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks are on IPv4 ORPort %d'%(
+                    CandidateList.describe_percentage(port_count,
+                                                      fallback_count),
+                    port))
+    return port_count
+
+  # log a message about the proportion of IPv6 fallbacks on IPv6 ORPort port
+  # and return that count
+  def describe_fallback_ipv6_orport(self, port):
+    port_count = len(self.fallbacks_on_ipv6_orport(port))
+    fallback_count = len(self.fallbacks_with_ipv6())
+    logging.warning('%s of IPv6 fallbacks are on IPv6 ORPort %d'%(
+                    CandidateList.describe_percentage(port_count,
+                                                      fallback_count),
+                    port))
+    return port_count
+
+  # log a message about the proportion of fallbacks on DirPort port
+  # and return that count
+  def describe_fallback_dirport(self, port):
+    port_count = len(self.fallbacks_on_dirport(port))
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks are on DirPort %d'%(
+                    CandidateList.describe_percentage(port_count,
+                                                      fallback_count),
+                    port))
+    return port_count
+
+  # log a message about the proportion of fallbacks on each dirport,
+  # each IPv4 orport, and each IPv6 orport
+  def describe_fallback_ports(self):
+    fallback_count = len(self.fallbacks)
+    ipv4_or_count = fallback_count
+    ipv4_or_count -= self.describe_fallback_ipv4_orport(443)
+    ipv4_or_count -= self.describe_fallback_ipv4_orport(9001)
+    logging.warning('%s of fallbacks are on other IPv4 ORPorts'%(
+                    CandidateList.describe_percentage(ipv4_or_count,
+                                                      fallback_count)))
+    ipv6_fallback_count = len(self.fallbacks_with_ipv6())
+    ipv6_or_count = ipv6_fallback_count
+    ipv6_or_count -= self.describe_fallback_ipv6_orport(443)
+    ipv6_or_count -= self.describe_fallback_ipv6_orport(9001)
+    logging.warning('%s of IPv6 fallbacks are on other IPv6 ORPorts'%(
+                    CandidateList.describe_percentage(ipv6_or_count,
+                                                      ipv6_fallback_count)))
+    dir_count = fallback_count
+    dir_count -= self.describe_fallback_dirport(80)
+    dir_count -= self.describe_fallback_dirport(9030)
+    logging.warning('%s of fallbacks are on other DirPorts'%(
+                    CandidateList.describe_percentage(dir_count,
+                                                      fallback_count)))
+
+  # return a list of fallbacks which have the Exit flag
+  def fallbacks_with_exit(self):
+    return filter(lambda x: x.is_exit(), self.fallbacks)
+
+  # log a message about the proportion of fallbacks with an Exit flag
+  def describe_fallback_exit_flag(self):
+    exit_falback_count = len(self.fallbacks_with_exit())
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks have the Exit flag'%(
+                    CandidateList.describe_percentage(exit_falback_count,
+                                                      fallback_count)))
+
+  # return a list of fallbacks which have an IPv6 address
+  def fallbacks_with_ipv6(self):
+    return filter(lambda x: x.has_ipv6(), self.fallbacks)
+
+  # log a message about the proportion of fallbacks on IPv6
+  def describe_fallback_ip_family(self):
+    ipv6_falback_count = len(self.fallbacks_with_ipv6())
+    fallback_count = len(self.fallbacks)
+    logging.warning('%s of fallbacks are on IPv6'%(
+                    CandidateList.describe_percentage(ipv6_falback_count,
+                                                      fallback_count)))
+
+  def summarise_fallbacks(self, eligible_count, operator_count, failed_count,
+                          guard_count, target_count):
+    s = ''
+    s += '/* To comment-out entries in this file, use C comments, and add *'
+    s += ' to the start of each line. (stem finds fallback entries using "'
+    s += ' at the start of a line.) */'
+    s += '\n'
+    # Report:
+    #  whether we checked consensus download times
+    #  the number of fallback directories (and limits/exclusions, if relevant)
+    #  min & max fallback bandwidths
+    #  #error if below minimum count
+    if PERFORM_IPV4_DIRPORT_CHECKS or PERFORM_IPV6_DIRPORT_CHECKS:
+      s += '/* Checked %s%s%s DirPorts served a consensus within %.1fs. */'%(
+            'IPv4' if PERFORM_IPV4_DIRPORT_CHECKS else '',
+            ' and ' if (PERFORM_IPV4_DIRPORT_CHECKS
+                        and PERFORM_IPV6_DIRPORT_CHECKS) else '',
+            'IPv6' if PERFORM_IPV6_DIRPORT_CHECKS else '',
+            CONSENSUS_DOWNLOAD_SPEED_MAX)
+    else:
+      s += '/* Did not check IPv4 or IPv6 DirPort consensus downloads. */'
+    s += '\n'
+    # Multiline C comment with #error if things go bad
+    s += '/*'
+    s += '\n'
+    # Integers don't need escaping in C comments
+    fallback_count = len(self.fallbacks)
+    if FALLBACK_PROPORTION_OF_GUARDS is None:
+      fallback_proportion = ''
+    else:
+      fallback_proportion = ', Target %d (%d * %.2f)'%(target_count,
+                                                guard_count,
+                                                FALLBACK_PROPORTION_OF_GUARDS)
+    s += 'Final Count: %d (Eligible %d%s'%(fallback_count, eligible_count,
+                                           fallback_proportion)
+    if MAX_FALLBACK_COUNT is not None:
+      s += ', Max %d'%(MAX_FALLBACK_COUNT)
+    s += ')\n'
+    if eligible_count != fallback_count:
+      removed_count = eligible_count - fallback_count
+      excess_to_target_or_max = (eligible_count - operator_count - failed_count
+                                 - fallback_count)
+      # some 'Failed' failed the check, others 'Skipped' the check,
+      # if we already had enough successful downloads
+      s += ('Excluded: %d (Same Operator %d, Failed/Skipped Download %d, ' +
+            'Excess %d)')%(removed_count, operator_count, failed_count,
+                           excess_to_target_or_max)
+      s += '\n'
+    min_fb = self.fallback_min()
+    min_bw = min_fb._data['measured_bandwidth']
+    max_fb = self.fallback_max()
+    max_bw = max_fb._data['measured_bandwidth']
+    s += 'Bandwidth Range: %.1f - %.1f MB/s'%(min_bw/(1024.0*1024.0),
+                                              max_bw/(1024.0*1024.0))
+    s += '\n'
+    s += '*/'
+    if fallback_count < MIN_FALLBACK_COUNT:
+      # We must have a minimum number of fallbacks so they are always
+      # reachable, and are in diverse locations
+      s += '\n'
+      s += '#error Fallback Count %d is too low. '%(fallback_count)
+      s += 'Must be at least %d for diversity. '%(MIN_FALLBACK_COUNT)
+      s += 'Try adding entries to the whitelist, '
+      s += 'or setting INCLUDE_UNLISTED_ENTRIES = True.'
+    return s
+
+## Main Function
+
+def list_fallbacks():
+  """ Fetches required onionoo documents and evaluates the
+      fallback directory criteria for each of the relays """
+
+  logging.warning('Downloading and parsing Onionoo data. ' +
+                  'This may take some time.')
+  # find relays that could be fallbacks
+  candidates = CandidateList()
+  candidates.add_relays()
+
+  # work out how many fallbacks we want
+  guard_count = candidates.count_guards()
+  if FALLBACK_PROPORTION_OF_GUARDS is None:
+    target_count = guard_count
+  else:
+    target_count = int(guard_count * FALLBACK_PROPORTION_OF_GUARDS)
+  # the maximum number of fallbacks is the least of:
+  # - the target fallback count (FALLBACK_PROPORTION_OF_GUARDS * guard count)
+  # - the maximum fallback count (MAX_FALLBACK_COUNT)
+  if MAX_FALLBACK_COUNT is None:
+    max_count = target_count
+  else:
+    max_count = min(target_count, MAX_FALLBACK_COUNT)
+
+  candidates.compute_fallbacks()
+  prefilter_fallbacks = copy.copy(candidates.fallbacks)
+
+  # filter with the whitelist and blacklist
+  # if a relay has changed IPv4 address or ports recently, it will be excluded
+  # as ineligible before we call apply_filter_lists, and so there will be no
+  # warning that the details have changed from those in the whitelist.
+  # instead, there will be an info-level log during the eligibility check.
+  initial_count = len(candidates.fallbacks)
+  excluded_count = candidates.apply_filter_lists()
+  print candidates.summarise_filters(initial_count, excluded_count)
+  eligible_count = len(candidates.fallbacks)
+
+  # calculate the measured bandwidth of each relay,
+  # then remove low-bandwidth relays
+  candidates.calculate_measured_bandwidth()
+  candidates.remove_low_bandwidth_relays()
+
+  # print the raw fallback list
+  #for x in candidates.fallbacks:
+  #  print x.fallbackdir_line(True)
+  #  print json.dumps(candidates[x]._data, sort_keys=True, indent=4,
+  #                   separators=(',', ': '), default=json_util.default)
+
+  # impose mandatory conditions here, like one per contact, family, IP
+  # in measured bandwidth order
+  candidates.sort_fallbacks_by_measured_bandwidth()
+  operator_count = 0
+  # only impose these limits on the final list - operators can nominate
+  # multiple candidate fallbacks, and then we choose the best set
+  if not OUTPUT_CANDIDATES:
+    operator_count += candidates.limit_fallbacks_same_ip()
+    operator_count += candidates.limit_fallbacks_same_contact()
+    operator_count += candidates.limit_fallbacks_same_family()
+
+  # check if each candidate can serve a consensus
+  # there's a small risk we've eliminated relays from the same operator that
+  # can serve a consensus, in favour of one that can't
+  # but given it takes up to 15 seconds to check each consensus download,
+  # the risk is worth it
+  if PERFORM_IPV4_DIRPORT_CHECKS or PERFORM_IPV6_DIRPORT_CHECKS:
+    logging.warning('Checking consensus download speeds. ' +
+                    'This may take some time.')
+  failed_count = candidates.perform_download_consensus_checks(max_count)
+
+  # analyse and log interesting diversity metrics
+  # like netblock, ports, exit, IPv4-only
+  # (we can't easily analyse AS, and it's hard to accurately analyse country)
+  candidates.describe_fallback_ip_family()
+  # if we can't import the ipaddress module, we can't do netblock analysis
+  if HAVE_IPADDRESS:
+    candidates.describe_fallback_netblocks()
+  candidates.describe_fallback_ports()
+  candidates.describe_fallback_exit_flag()
+
+  # output C comments summarising the fallback selection process
+  if len(candidates.fallbacks) > 0:
+    print candidates.summarise_fallbacks(eligible_count, operator_count,
+                                         failed_count, guard_count,
+                                         target_count)
+  else:
+    print '/* No Fallbacks met criteria */'
+
+  # output C comments specifying the OnionOO data used to create the list
+  for s in fetch_source_list():
+    print describe_fetch_source(s)
+
+  # if we're outputting the final fallback list, sort by fingerprint
+  # this makes diffs much more stable
+  # otherwise, leave sorted by bandwidth, which allows operators to be
+  # contacted in priority order
+  if not OUTPUT_CANDIDATES:
+    candidates.sort_fallbacks_by_fingerprint()
+
+  for x in candidates.fallbacks:
+    print x.fallbackdir_line(candidates.fallbacks, prefilter_fallbacks)
+
+if __name__ == "__main__":
+  list_fallbacks()