diff options
Diffstat (limited to 'scripts/maint/updateFallbackDirs.py')
-rwxr-xr-x | scripts/maint/updateFallbackDirs.py | 208 |
1 files changed, 156 insertions, 52 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py index 312049608c..44a7318fc8 100755 --- a/scripts/maint/updateFallbackDirs.py +++ b/scripts/maint/updateFallbackDirs.py @@ -112,17 +112,13 @@ CUTOFF_GUARD = .95 # .00 means no bad exits PERMITTED_BADEXIT = .00 -# Clients will time out after 30 seconds trying to download a consensus -# So allow fallback directories half that to deliver a consensus -# The exact download times might change based on the network connection -# running this script, but only by a few seconds -# There is also about a second of python overhead -CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0 -# If the relay fails a consensus check, retry the download -# This avoids delisting a relay due to transient network conditions -CONSENSUS_DOWNLOAD_RETRY = True +# older entries' weights are adjusted with ALPHA^(age in days) +AGE_ALPHA = 0.99 + +# this factor is used to scale OnionOO entries to [0,1] +ONIONOO_SCALE_ONE = 999. -## List Length Limits +## Fallback Count Limits # The target for these parameters is 20% of the guards in the network # This is around 200 as of October 2015 @@ -130,37 +126,53 @@ _FB_POG = 0.2 FALLBACK_PROPORTION_OF_GUARDS = None if OUTPUT_CANDIDATES else _FB_POG # We want exactly 100 fallbacks for the initial release -# Limit the number of fallbacks (eliminating lowest by weight) +# This gives us scope to add extra fallbacks to the list as needed +# Limit the number of fallbacks (eliminating lowest by advertised bandwidth) MAX_FALLBACK_COUNT = None if OUTPUT_CANDIDATES else 100 # Emit a C #error if the number of fallbacks is below MIN_FALLBACK_COUNT = 100 -## Fallback Weight Settings +## Fallback Bandwidth Requirements -# Any fallback with the Exit flag has its consensus weight multipled by this -EXIT_WEIGHT_FRACTION = 1.0 +# Any fallback with the Exit flag has its bandwidth multipled by this fraction +# to make sure we aren't further overloading exits +# (Set to 1.0, because we asked that only lightly loaded exits opt-in, +# and the extra load really isn't that much for large relays.) +EXIT_BANDWIDTH_FRACTION = 1.0 -# If a single fallback's consensus weight is too low, it's pointless adding it +# If a single fallback's bandwidth is too low, it's pointless adding it # We expect fallbacks to handle an extra 30 kilobytes per second of traffic -# Make sure they support a hundred times that -MIN_CONSENSUS_WEIGHT = 30.0 * 100.0 +# Make sure they can support a hundred times the expected extra load +# (Use 102.4 to make it come out nicely in MB/s) +# We convert this to a consensus weight before applying the filter, +# because all the bandwidth amounts are specified by the relay +MIN_BANDWIDTH = 102.4 * 30.0 * 1024.0 + +# Clients will time out after 30 seconds trying to download a consensus +# So allow fallback directories half that to deliver a consensus +# The exact download times might change based on the network connection +# running this script, but only by a few seconds +# There is also about a second of python overhead +CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0 +# If the relay fails a consensus check, retry the download +# This avoids delisting a relay due to transient network conditions +CONSENSUS_DOWNLOAD_RETRY = True + +## Fallback Weights for Client Selection # All fallback weights are equal, and set to the value below # Authorities are weighted 1.0 by default # Clients use these weights to select fallbacks and authorities at random # If there are 100 fallbacks and 9 authorities: -# - each fallback is chosen with probability 10/(1000 + 9) ~= 0.99% -# - each authority is chosen with probability 1/(1000 + 9) ~= 0.09% +# - each fallback is chosen with probability 10.0/(10.0*100 + 1.0*9) ~= 0.99% +# - each authority is chosen with probability 1.0/(10.0*100 + 1.0*9) ~= 0.09% +# A client choosing a bootstrap directory server will choose a fallback for +# 10.0/(10.0*100 + 1.0*9) * 100 = 99.1% of attempts, and an authority for +# 1.0/(10.0*100 + 1.0*9) * 9 = 0.9% of attempts. +# (This disregards the bootstrap schedules, where clients start by choosing +# from fallbacks & authoritites, then later choose from only authorities.) FALLBACK_OUTPUT_WEIGHT = 10.0 -## Other Configuration Parameters - -# older entries' weights are adjusted with ALPHA^(age in days) -AGE_ALPHA = 0.99 - -# this factor is used to scale OnionOO entries to [0,1] -ONIONOO_SCALE_ONE = 999. - ## Parsing Functions def parse_ts(t): @@ -448,6 +460,11 @@ class Candidate(object): details['contact'] = None if not 'flags' in details or details['flags'] is None: details['flags'] = [] + if (not 'advertised_bandwidth' in details + or details['advertised_bandwidth'] is None): + # relays without advertised bandwdith have it calculated from their + # consensus weight + details['advertised_bandwidth'] = 0 details['last_changed_address_or_port'] = parse_ts( details['last_changed_address_or_port']) self._data = details @@ -462,10 +479,6 @@ class Candidate(object): self._compute_ipv6addr() if self.ipv6addr is None: logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,)) - # Reduce the weight of exits to EXIT_WEIGHT_FRACTION * consensus_weight - if self.is_exit(): - exit_weight = self._data['consensus_weight'] * EXIT_WEIGHT_FRACTION - self._data['consensus_weight'] = exit_weight def _stable_sort_or_addresses(self): # replace self._data['or_addresses'] with a stable ordering, @@ -754,11 +767,9 @@ class Candidate(object): logging.info('%s not a candidate: guard avg too low (%lf)', self._fpr, self._guard) return False - if (MIN_CONSENSUS_WEIGHT is not None - and self._data['consensus_weight'] < MIN_CONSENSUS_WEIGHT): - logging.info('%s not a candidate: consensus weight %.0f too low, must ' + - 'be at least %.0f', self._fpr, - self._data['consensus_weight'], MIN_CONSENSUS_WEIGHT) + if (not self._data.has_key('consensus_weight') + or self._data['consensus_weight'] < 1): + logging.info('%s not a candidate: consensus weight invalid', self._fpr) return False return True @@ -889,6 +900,30 @@ class Candidate(object): ipv6 if has_ipv6 else value) return False + def cw_to_bw_factor(self): + # any relays with a missing or zero consensus weight are not candidates + # any relays with a missing advertised bandwidth have it set to zero + return self._data['advertised_bandwidth'] / self._data['consensus_weight'] + + # since advertised_bandwidth is reported by the relay, it can be gamed + # to avoid this, use the median consensus weight to bandwidth factor to + # estimate this relay's measured bandwidth, and make that the upper limit + def measured_bandwidth(self, median_cw_to_bw_factor): + cw_to_bw= median_cw_to_bw_factor + # Reduce exit bandwidth to make sure we're not overloading them + if self.is_exit(): + cw_to_bw *= EXIT_BANDWIDTH_FRACTION + measured_bandwidth = self._data['consensus_weight'] * cw_to_bw + if self._data['advertised_bandwidth'] != 0: + # limit advertised bandwidth (if available) to measured bandwidth + return min(measured_bandwidth, self._data['advertised_bandwidth']) + else: + return measured_bandwidth + + def set_measured_bandwidth(self, median_cw_to_bw_factor): + self._data['measured_bandwidth'] = self.measured_bandwidth( + median_cw_to_bw_factor) + def is_exit(self): return 'Exit' in self._data['flags'] @@ -1056,8 +1091,8 @@ class CandidateList(dict): logging.debug('Loading details document.') d = fetch('details', fields=('fingerprint,nickname,contact,last_changed_address_or_port,' + - 'consensus_weight,or_addresses,dir_address,' + - 'recommended_version,flags')) + 'consensus_weight,advertised_bandwidth,or_addresses,' + + 'dir_address,recommended_version,flags')) logging.debug('Loading details document done.') if not 'relays' in d: raise Exception("No relays found in document.") @@ -1083,15 +1118,24 @@ class CandidateList(dict): guard_count += 1 return guard_count - # Find fallbacks that fit the uptime, stability, and flags criteria + # Find fallbacks that fit the uptime, stability, and flags criteria, + # and make an array of them in self.fallbacks def compute_fallbacks(self): self.fallbacks = map(lambda x: self[x], - sorted( - filter(lambda x: self[x].is_candidate(), - self.keys()), - key=lambda x: self[x]._data['consensus_weight'], + filter(lambda x: self[x].is_candidate(), + self.keys())) + + # sort fallbacks by their consensus weight to advertised bandwidth factor, + # lowest to highest + # used to find the median cw_to_bw_factor() + def sort_fallbacks_by_cw_to_bw_factor(self): + self.fallbacks.sort(key=lambda x: self[x].cw_to_bw_factor()) + + # sort fallbacks by their measured bandwidth, highest to lowest + # calculate_measured_bandwidth before calling this + def sort_fallbacks_by_measured_bandwidth(self): + self.fallbacks.sort(key=lambda x: self[x].self._data['measured_bandwidth'], reverse=True) - ) @staticmethod def load_relaylist(file_name): @@ -1194,13 +1238,64 @@ class CandidateList(dict): return '/* Whitelist & blacklist excluded %d of %d candidates. */'%( excluded_count, initial_count) - def fallback_min_weight(self): + # calculate each fallback's measured bandwidth based on the median + # consensus weight to advertised bandwdith ratio + def calculate_measured_bandwidth(self): + self.sort_fallbacks_by_cw_to_bw_factor() + median_fallback = self.fallback_median(True) + median_cw_to_bw_factor = median_fallback.cw_to_bw_factor() + for f in self.fallbacks: + f.set_measured_bandwidth(median_cw_to_bw_factor) + + # remove relays with low measured bandwidth from the fallback list + # calculate_measured_bandwidth for each relay before calling this + def remove_low_bandwidth_relays(self): + if MIN_BANDWIDTH is None: + return + above_min_bw_fallbacks = [] + for f in self.fallbacks: + if f._data['measured_bandwidth'] >= MIN_BANDWIDTH: + above_min_bw_fallbacks.append(f) + else: + # the bandwidth we log here is limited by the relay's consensus weight + # as well as its adverttised bandwidth. See set_measured_bandwidth + # for details + logging.info('%s not a candidate: bandwidth %.1fMB/s too low, must ' + + 'be at least %.1fMB/s', f._fpr, + f._data['measured_bandwidth']/(1024.0*1024.0), + MIN_BANDWIDTH/(1024.0*1024.0)) + self.fallbacks = above_min_bw_fallbacks + + # the minimum fallback in the list + # call one of the sort_fallbacks_* functions before calling this + def fallback_min(self): if len(self.fallbacks) > 0: return self.fallbacks[-1] else: return None - def fallback_max_weight(self): + # the median fallback in the list + # call one of the sort_fallbacks_* functions before calling this + def fallback_median(self, require_advertised_bandwidth): + # use the low-median when there are an evan number of fallbacks, + # for consistency with the bandwidth authorities + if len(self.fallbacks) > 0: + median_position = (len(self.fallbacks) - 1) / 2 + if not require_advertised_bandwidth: + return self.fallbacks[median_position] + # if we need advertised_bandwidth but this relay doesn't have it, + # move to a fallback with greater consensus weight until we find one + while not self.fallbacks[median_position]._data['advertised_bandwidth']: + median_position += 1 + if median_position >= len(self.fallbacks): + return None + return self.fallbacks[median_position] + else: + return None + + # the maximum fallback in the list + # call one of the sort_fallbacks_* functions before calling this + def fallback_max(self): if len(self.fallbacks) > 0: return self.fallbacks[0] else: @@ -1211,7 +1306,7 @@ class CandidateList(dict): # Report: # whether we checked consensus download times # the number of fallback directories (and limits/exclusions, if relevant) - # min & max fallback weights + # min & max fallback bandwidths # #error if below minimum count if PERFORM_IPV4_DIRPORT_CHECKS or PERFORM_IPV6_DIRPORT_CHECKS: s = '/* Checked %s%s%s DirPorts served a consensus within %.1fs. */'%( @@ -1243,11 +1338,12 @@ class CandidateList(dict): s += 'Excluded: %d (Eligible Count Exceeded Target Count)'%( eligible_count - fallback_count) s += '\n' - min_fb = self.fallback_min_weight() - min_weight = min_fb._data['consensus_weight'] - max_fb = self.fallback_max_weight() - max_weight = max_fb._data['consensus_weight'] - s += 'Consensus Weight Range: %d - %d'%(min_weight, max_weight) + min_fb = self.fallback_min() + min_bw = min_fb._data['measured_bandwidth'] + max_fb = self.fallback_max() + max_bw = max_fb._data['measured_bandwidth'] + s += 'Bandwidth Range: %.1f - %.1f MB/s'%(min_bw/(1024.0*1024.0), + max_bw/(1024.0*1024.0)) s += '\n' s += '*/' if fallback_count < MIN_FALLBACK_COUNT: @@ -1293,6 +1389,14 @@ def list_fallbacks(): print candidates.summarise_filters(initial_count, excluded_count) eligible_count = len(candidates.fallbacks) + # calculate the measured bandwidth of each relay, + # then remove low-bandwidth relays + candidates.calculate_measured_bandwidth() + candidates.remove_low_bandwidth_relays() + # make sure the list is sorted by bandwidth when we output it + # so that we include the active fallbacks with the greatest bandwidth + candidates.sort_fallbacks_by_measured_bandwidth() + # print the raw fallback list #for x in candidates.fallbacks: # print x.fallbackdir_line(True) |