summaryrefslogtreecommitdiff
path: root/scripts/maint/updateFallbackDirs.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/maint/updateFallbackDirs.py')
-rwxr-xr-xscripts/maint/updateFallbackDirs.py208
1 files changed, 156 insertions, 52 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
index 312049608c..44a7318fc8 100755
--- a/scripts/maint/updateFallbackDirs.py
+++ b/scripts/maint/updateFallbackDirs.py
@@ -112,17 +112,13 @@ CUTOFF_GUARD = .95
# .00 means no bad exits
PERMITTED_BADEXIT = .00
-# Clients will time out after 30 seconds trying to download a consensus
-# So allow fallback directories half that to deliver a consensus
-# The exact download times might change based on the network connection
-# running this script, but only by a few seconds
-# There is also about a second of python overhead
-CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0
-# If the relay fails a consensus check, retry the download
-# This avoids delisting a relay due to transient network conditions
-CONSENSUS_DOWNLOAD_RETRY = True
+# older entries' weights are adjusted with ALPHA^(age in days)
+AGE_ALPHA = 0.99
+
+# this factor is used to scale OnionOO entries to [0,1]
+ONIONOO_SCALE_ONE = 999.
-## List Length Limits
+## Fallback Count Limits
# The target for these parameters is 20% of the guards in the network
# This is around 200 as of October 2015
@@ -130,37 +126,53 @@ _FB_POG = 0.2
FALLBACK_PROPORTION_OF_GUARDS = None if OUTPUT_CANDIDATES else _FB_POG
# We want exactly 100 fallbacks for the initial release
-# Limit the number of fallbacks (eliminating lowest by weight)
+# This gives us scope to add extra fallbacks to the list as needed
+# Limit the number of fallbacks (eliminating lowest by advertised bandwidth)
MAX_FALLBACK_COUNT = None if OUTPUT_CANDIDATES else 100
# Emit a C #error if the number of fallbacks is below
MIN_FALLBACK_COUNT = 100
-## Fallback Weight Settings
+## Fallback Bandwidth Requirements
-# Any fallback with the Exit flag has its consensus weight multipled by this
-EXIT_WEIGHT_FRACTION = 1.0
+# Any fallback with the Exit flag has its bandwidth multipled by this fraction
+# to make sure we aren't further overloading exits
+# (Set to 1.0, because we asked that only lightly loaded exits opt-in,
+# and the extra load really isn't that much for large relays.)
+EXIT_BANDWIDTH_FRACTION = 1.0
-# If a single fallback's consensus weight is too low, it's pointless adding it
+# If a single fallback's bandwidth is too low, it's pointless adding it
# We expect fallbacks to handle an extra 30 kilobytes per second of traffic
-# Make sure they support a hundred times that
-MIN_CONSENSUS_WEIGHT = 30.0 * 100.0
+# Make sure they can support a hundred times the expected extra load
+# (Use 102.4 to make it come out nicely in MB/s)
+# We convert this to a consensus weight before applying the filter,
+# because all the bandwidth amounts are specified by the relay
+MIN_BANDWIDTH = 102.4 * 30.0 * 1024.0
+
+# Clients will time out after 30 seconds trying to download a consensus
+# So allow fallback directories half that to deliver a consensus
+# The exact download times might change based on the network connection
+# running this script, but only by a few seconds
+# There is also about a second of python overhead
+CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0
+# If the relay fails a consensus check, retry the download
+# This avoids delisting a relay due to transient network conditions
+CONSENSUS_DOWNLOAD_RETRY = True
+
+## Fallback Weights for Client Selection
# All fallback weights are equal, and set to the value below
# Authorities are weighted 1.0 by default
# Clients use these weights to select fallbacks and authorities at random
# If there are 100 fallbacks and 9 authorities:
-# - each fallback is chosen with probability 10/(1000 + 9) ~= 0.99%
-# - each authority is chosen with probability 1/(1000 + 9) ~= 0.09%
+# - each fallback is chosen with probability 10.0/(10.0*100 + 1.0*9) ~= 0.99%
+# - each authority is chosen with probability 1.0/(10.0*100 + 1.0*9) ~= 0.09%
+# A client choosing a bootstrap directory server will choose a fallback for
+# 10.0/(10.0*100 + 1.0*9) * 100 = 99.1% of attempts, and an authority for
+# 1.0/(10.0*100 + 1.0*9) * 9 = 0.9% of attempts.
+# (This disregards the bootstrap schedules, where clients start by choosing
+# from fallbacks & authoritites, then later choose from only authorities.)
FALLBACK_OUTPUT_WEIGHT = 10.0
-## Other Configuration Parameters
-
-# older entries' weights are adjusted with ALPHA^(age in days)
-AGE_ALPHA = 0.99
-
-# this factor is used to scale OnionOO entries to [0,1]
-ONIONOO_SCALE_ONE = 999.
-
## Parsing Functions
def parse_ts(t):
@@ -448,6 +460,11 @@ class Candidate(object):
details['contact'] = None
if not 'flags' in details or details['flags'] is None:
details['flags'] = []
+ if (not 'advertised_bandwidth' in details
+ or details['advertised_bandwidth'] is None):
+ # relays without advertised bandwdith have it calculated from their
+ # consensus weight
+ details['advertised_bandwidth'] = 0
details['last_changed_address_or_port'] = parse_ts(
details['last_changed_address_or_port'])
self._data = details
@@ -462,10 +479,6 @@ class Candidate(object):
self._compute_ipv6addr()
if self.ipv6addr is None:
logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
- # Reduce the weight of exits to EXIT_WEIGHT_FRACTION * consensus_weight
- if self.is_exit():
- exit_weight = self._data['consensus_weight'] * EXIT_WEIGHT_FRACTION
- self._data['consensus_weight'] = exit_weight
def _stable_sort_or_addresses(self):
# replace self._data['or_addresses'] with a stable ordering,
@@ -754,11 +767,9 @@ class Candidate(object):
logging.info('%s not a candidate: guard avg too low (%lf)',
self._fpr, self._guard)
return False
- if (MIN_CONSENSUS_WEIGHT is not None
- and self._data['consensus_weight'] < MIN_CONSENSUS_WEIGHT):
- logging.info('%s not a candidate: consensus weight %.0f too low, must ' +
- 'be at least %.0f', self._fpr,
- self._data['consensus_weight'], MIN_CONSENSUS_WEIGHT)
+ if (not self._data.has_key('consensus_weight')
+ or self._data['consensus_weight'] < 1):
+ logging.info('%s not a candidate: consensus weight invalid', self._fpr)
return False
return True
@@ -889,6 +900,30 @@ class Candidate(object):
ipv6 if has_ipv6 else value)
return False
+ def cw_to_bw_factor(self):
+ # any relays with a missing or zero consensus weight are not candidates
+ # any relays with a missing advertised bandwidth have it set to zero
+ return self._data['advertised_bandwidth'] / self._data['consensus_weight']
+
+ # since advertised_bandwidth is reported by the relay, it can be gamed
+ # to avoid this, use the median consensus weight to bandwidth factor to
+ # estimate this relay's measured bandwidth, and make that the upper limit
+ def measured_bandwidth(self, median_cw_to_bw_factor):
+ cw_to_bw= median_cw_to_bw_factor
+ # Reduce exit bandwidth to make sure we're not overloading them
+ if self.is_exit():
+ cw_to_bw *= EXIT_BANDWIDTH_FRACTION
+ measured_bandwidth = self._data['consensus_weight'] * cw_to_bw
+ if self._data['advertised_bandwidth'] != 0:
+ # limit advertised bandwidth (if available) to measured bandwidth
+ return min(measured_bandwidth, self._data['advertised_bandwidth'])
+ else:
+ return measured_bandwidth
+
+ def set_measured_bandwidth(self, median_cw_to_bw_factor):
+ self._data['measured_bandwidth'] = self.measured_bandwidth(
+ median_cw_to_bw_factor)
+
def is_exit(self):
return 'Exit' in self._data['flags']
@@ -1056,8 +1091,8 @@ class CandidateList(dict):
logging.debug('Loading details document.')
d = fetch('details',
fields=('fingerprint,nickname,contact,last_changed_address_or_port,' +
- 'consensus_weight,or_addresses,dir_address,' +
- 'recommended_version,flags'))
+ 'consensus_weight,advertised_bandwidth,or_addresses,' +
+ 'dir_address,recommended_version,flags'))
logging.debug('Loading details document done.')
if not 'relays' in d: raise Exception("No relays found in document.")
@@ -1083,15 +1118,24 @@ class CandidateList(dict):
guard_count += 1
return guard_count
- # Find fallbacks that fit the uptime, stability, and flags criteria
+ # Find fallbacks that fit the uptime, stability, and flags criteria,
+ # and make an array of them in self.fallbacks
def compute_fallbacks(self):
self.fallbacks = map(lambda x: self[x],
- sorted(
- filter(lambda x: self[x].is_candidate(),
- self.keys()),
- key=lambda x: self[x]._data['consensus_weight'],
+ filter(lambda x: self[x].is_candidate(),
+ self.keys()))
+
+ # sort fallbacks by their consensus weight to advertised bandwidth factor,
+ # lowest to highest
+ # used to find the median cw_to_bw_factor()
+ def sort_fallbacks_by_cw_to_bw_factor(self):
+ self.fallbacks.sort(key=lambda x: self[x].cw_to_bw_factor())
+
+ # sort fallbacks by their measured bandwidth, highest to lowest
+ # calculate_measured_bandwidth before calling this
+ def sort_fallbacks_by_measured_bandwidth(self):
+ self.fallbacks.sort(key=lambda x: self[x].self._data['measured_bandwidth'],
reverse=True)
- )
@staticmethod
def load_relaylist(file_name):
@@ -1194,13 +1238,64 @@ class CandidateList(dict):
return '/* Whitelist & blacklist excluded %d of %d candidates. */'%(
excluded_count, initial_count)
- def fallback_min_weight(self):
+ # calculate each fallback's measured bandwidth based on the median
+ # consensus weight to advertised bandwdith ratio
+ def calculate_measured_bandwidth(self):
+ self.sort_fallbacks_by_cw_to_bw_factor()
+ median_fallback = self.fallback_median(True)
+ median_cw_to_bw_factor = median_fallback.cw_to_bw_factor()
+ for f in self.fallbacks:
+ f.set_measured_bandwidth(median_cw_to_bw_factor)
+
+ # remove relays with low measured bandwidth from the fallback list
+ # calculate_measured_bandwidth for each relay before calling this
+ def remove_low_bandwidth_relays(self):
+ if MIN_BANDWIDTH is None:
+ return
+ above_min_bw_fallbacks = []
+ for f in self.fallbacks:
+ if f._data['measured_bandwidth'] >= MIN_BANDWIDTH:
+ above_min_bw_fallbacks.append(f)
+ else:
+ # the bandwidth we log here is limited by the relay's consensus weight
+ # as well as its adverttised bandwidth. See set_measured_bandwidth
+ # for details
+ logging.info('%s not a candidate: bandwidth %.1fMB/s too low, must ' +
+ 'be at least %.1fMB/s', f._fpr,
+ f._data['measured_bandwidth']/(1024.0*1024.0),
+ MIN_BANDWIDTH/(1024.0*1024.0))
+ self.fallbacks = above_min_bw_fallbacks
+
+ # the minimum fallback in the list
+ # call one of the sort_fallbacks_* functions before calling this
+ def fallback_min(self):
if len(self.fallbacks) > 0:
return self.fallbacks[-1]
else:
return None
- def fallback_max_weight(self):
+ # the median fallback in the list
+ # call one of the sort_fallbacks_* functions before calling this
+ def fallback_median(self, require_advertised_bandwidth):
+ # use the low-median when there are an evan number of fallbacks,
+ # for consistency with the bandwidth authorities
+ if len(self.fallbacks) > 0:
+ median_position = (len(self.fallbacks) - 1) / 2
+ if not require_advertised_bandwidth:
+ return self.fallbacks[median_position]
+ # if we need advertised_bandwidth but this relay doesn't have it,
+ # move to a fallback with greater consensus weight until we find one
+ while not self.fallbacks[median_position]._data['advertised_bandwidth']:
+ median_position += 1
+ if median_position >= len(self.fallbacks):
+ return None
+ return self.fallbacks[median_position]
+ else:
+ return None
+
+ # the maximum fallback in the list
+ # call one of the sort_fallbacks_* functions before calling this
+ def fallback_max(self):
if len(self.fallbacks) > 0:
return self.fallbacks[0]
else:
@@ -1211,7 +1306,7 @@ class CandidateList(dict):
# Report:
# whether we checked consensus download times
# the number of fallback directories (and limits/exclusions, if relevant)
- # min & max fallback weights
+ # min & max fallback bandwidths
# #error if below minimum count
if PERFORM_IPV4_DIRPORT_CHECKS or PERFORM_IPV6_DIRPORT_CHECKS:
s = '/* Checked %s%s%s DirPorts served a consensus within %.1fs. */'%(
@@ -1243,11 +1338,12 @@ class CandidateList(dict):
s += 'Excluded: %d (Eligible Count Exceeded Target Count)'%(
eligible_count - fallback_count)
s += '\n'
- min_fb = self.fallback_min_weight()
- min_weight = min_fb._data['consensus_weight']
- max_fb = self.fallback_max_weight()
- max_weight = max_fb._data['consensus_weight']
- s += 'Consensus Weight Range: %d - %d'%(min_weight, max_weight)
+ min_fb = self.fallback_min()
+ min_bw = min_fb._data['measured_bandwidth']
+ max_fb = self.fallback_max()
+ max_bw = max_fb._data['measured_bandwidth']
+ s += 'Bandwidth Range: %.1f - %.1f MB/s'%(min_bw/(1024.0*1024.0),
+ max_bw/(1024.0*1024.0))
s += '\n'
s += '*/'
if fallback_count < MIN_FALLBACK_COUNT:
@@ -1293,6 +1389,14 @@ def list_fallbacks():
print candidates.summarise_filters(initial_count, excluded_count)
eligible_count = len(candidates.fallbacks)
+ # calculate the measured bandwidth of each relay,
+ # then remove low-bandwidth relays
+ candidates.calculate_measured_bandwidth()
+ candidates.remove_low_bandwidth_relays()
+ # make sure the list is sorted by bandwidth when we output it
+ # so that we include the active fallbacks with the greatest bandwidth
+ candidates.sort_fallbacks_by_measured_bandwidth()
+
# print the raw fallback list
#for x in candidates.fallbacks:
# print x.fallbackdir_line(True)