diff options
Diffstat (limited to 'scripts/maint/updateFallbackDirs.py')
-rwxr-xr-x | scripts/maint/updateFallbackDirs.py | 302 |
1 files changed, 225 insertions, 77 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py index 0ea3992d8f..14372d0e83 100755 --- a/scripts/maint/updateFallbackDirs.py +++ b/scripts/maint/updateFallbackDirs.py @@ -18,8 +18,8 @@ # Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package) # for netblock analysis. # -# Then read the logs to make sure the fallbacks aren't dominated by a single -# netblock or port. +# After running this script, read the logs to make sure the fallbacks aren't +# dominated by a single netblock or port. # Script by weasel, April 2015 # Portions by gsathya & karsten, 2013 @@ -39,8 +39,6 @@ import urllib import urllib2 import hashlib import dateutil.parser -# bson_lazy provides bson -#from bson import json_util import copy import re @@ -100,19 +98,29 @@ MUST_BE_RUNNING_NOW = (PERFORM_IPV4_DIRPORT_CHECKS # Clients have been using microdesc consensuses by default for a while now DOWNLOAD_MICRODESC_CONSENSUS = True -# If a relay delivers an expired consensus, if it expired less than this many -# seconds ago, we still allow the relay. This should never be less than -90, -# as all directory mirrors should have downloaded a consensus 90 minutes -# before it expires. It should never be more than 24 hours, because clients -# reject consensuses that are older than REASONABLY_LIVE_TIME. -# For the consensus expiry check to be accurate, the machine running this -# script needs an accurate clock. +# If a relay delivers an invalid consensus, if it will become valid less than +# this many seconds in the future, or expired less than this many seconds ago, +# accept the relay as a fallback. For the consensus expiry check to be +# accurate, the machine running this script needs an accurate clock. # -# Relays on 0.3.0 and later return a 404 when they are about to serve an -# expired consensus. This makes them fail the download check. -# We use a tolerance of 0, so that 0.2.x series relays also fail the download -# check if they serve an expired consensus. -CONSENSUS_EXPIRY_TOLERANCE = 0 +# Relays on 0.3.0 and later return a 404 when they are about to serve a +# consensus that expired more than 24 hours ago. 0.2.9 and earlier relays +# will serve consensuses that are very old. +# +# Relays on 0.3.5.6-rc? and later return a 404 when they are about to serve a +# consensus that will become valid more than 24 hours in the future. Older +# relays don't serve future consensuses. +# +# A 404 makes relays fail the download check. We use a tolerance of 24 hours, +# so that 0.2.9 relays also fail the download check if they serve a consensus +# that is not reasonably live. +# +# REASONABLY_LIVE_TIME should never be more than Tor's REASONABLY_LIVE_TIME, +# (24 hours), because clients reject consensuses that are older than that. +# Clients on 0.3.5.5-alpha? and earlier also won't select guards from +# consensuses that have expired, but can bootstrap if they already have guards +# in their state file. +REASONABLY_LIVE_TIME = 24*60*60 # Output fallback name, flags, bandwidth, and ContactInfo in a C comment? OUTPUT_COMMENTS = True if OUTPUT_CANDIDATES else False @@ -912,61 +920,181 @@ class Candidate(object): return False return True - def is_in_whitelist(self, relaylist): - """ A fallback matches if each key in the whitelist line matches: + def id_matches(self, id, exact=False): + """ Does this fallback's id match id? + exact is ignored. """ + return self._fpr == id + + def ipv4_addr_matches(self, ipv4_addr, exact=False): + """ Does this fallback's IPv4 address match ipv4_addr? + exact is ignored. """ + return self.dirip == ipv4_addr + + def ipv4_dirport_matches(self, ipv4_dirport, exact=False): + """ Does this fallback's IPv4 dirport match ipv4_dirport? + If exact is False, always return True. """ + if exact: + return self.dirport == int(ipv4_dirport) + else: + return True + + def ipv4_and_dirport_matches(self, ipv4_addr, ipv4_dirport, exact=False): + """ Does this fallback's IPv4 address match ipv4_addr? + If exact is True, also check ipv4_dirport. """ + ipv4_match = self.ipv4_addr_matches(ipv4_addr, exact=exact) + if exact: + return ipv4_match and self.ipv4_dirport_matches(ipv4_dirport, + exact=exact) + else: + return ipv4_match + + def ipv4_orport_matches(self, ipv4_orport, exact=False): + """ Does this fallback's IPv4 orport match ipv4_orport? + If exact is False, always return True. """ + if exact: + return self.orport == int(ipv4_orport) + else: + return True + + def ipv4_and_orport_matches(self, ipv4_addr, ipv4_orport, exact=False): + """ Does this fallback's IPv4 address match ipv4_addr? + If exact is True, also check ipv4_orport. """ + ipv4_match = self.ipv4_addr_matches(ipv4_addr, exact=exact) + if exact: + return ipv4_match and self.ipv4_orport_matches(ipv4_orport, + exact=exact) + else: + return ipv4_match + + def ipv6_addr_matches(self, ipv6_addr, exact=False): + """ Does this fallback's IPv6 address match ipv6_addr? + Both addresses must be present to match. + exact is ignored. """ + if self.has_ipv6() and ipv6_addr is not None: + # Check that we have a bracketed IPv6 address without a port + assert(ipv6_addr.startswith('[') and ipv6_addr.endswith(']')) + return self.ipv6addr == ipv6_addr + else: + return False + + def ipv6_orport_matches(self, ipv6_orport, exact=False): + """ Does this fallback's IPv6 orport match ipv6_orport? + Both ports must be present to match. + If exact is False, always return True. """ + if exact: + return (self.has_ipv6() and ipv6_orport is not None and + self.ipv6orport == int(ipv6_orport)) + else: + return True + + def ipv6_and_orport_matches(self, ipv6_addr, ipv6_orport, exact=False): + """ Does this fallback's IPv6 address match ipv6_addr? + If exact is True, also check ipv6_orport. """ + ipv6_match = self.ipv6_addr_matches(ipv6_addr, exact=exact) + if exact: + return ipv6_match and self.ipv6_orport_matches(ipv6_orport, + exact=exact) + else: + return ipv6_match + + def entry_matches_exact(self, entry): + """ Is entry an exact match for this fallback? + A fallback is an exact match for entry if each key in entry matches: ipv4 dirport orport id - ipv6 address and port (if present) + ipv6 address and port (if present in the fallback or the whitelist) If the fallback has an ipv6 key, the whitelist line must also have - it, and vice versa, otherwise they don't match. """ - ipv6 = None - if self.has_ipv6(): - ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport) - for entry in relaylist: - if entry['id'] != self._fpr: - # can't log here unless we match an IP and port, because every relay's - # fingerprint is compared to every entry's fingerprint - if entry['ipv4'] == self.dirip and int(entry['orport']) == self.orport: - logging.warning('%s excluded: has OR %s:%d changed fingerprint to ' + - '%s?', entry['id'], self.dirip, self.orport, - self._fpr) - if self.has_ipv6() and entry.has_key('ipv6') and entry['ipv6'] == ipv6: - logging.warning('%s excluded: has OR %s changed fingerprint to ' + - '%s?', entry['id'], ipv6, self._fpr) - continue - if entry['ipv4'] != self.dirip: - logging.warning('%s excluded: has it changed IPv4 from %s to %s?', - self._fpr, entry['ipv4'], self.dirip) - continue - if int(entry['dirport']) != self.dirport: - logging.warning('%s excluded: has it changed DirPort from %s:%d to ' + - '%s:%d?', self._fpr, self.dirip, int(entry['dirport']), - self.dirip, self.dirport) - continue - if int(entry['orport']) != self.orport: - logging.warning('%s excluded: has it changed ORPort from %s:%d to ' + - '%s:%d?', self._fpr, self.dirip, int(entry['orport']), - self.dirip, self.orport) - continue - if entry.has_key('ipv6') and self.has_ipv6(): - # if both entry and fallback have an ipv6 address, compare them - if entry['ipv6'] != ipv6: - logging.warning('%s excluded: has it changed IPv6 ORPort from %s ' + - 'to %s?', self._fpr, entry['ipv6'], ipv6) - continue - # if the fallback has an IPv6 address but the whitelist entry - # doesn't, or vice versa, the whitelist entry doesn't match - elif entry.has_key('ipv6') and not self.has_ipv6(): - logging.warning('%s excluded: has it lost its former IPv6 address %s?', - self._fpr, entry['ipv6']) - continue - elif not entry.has_key('ipv6') and self.has_ipv6(): - logging.warning('%s excluded: has it gained an IPv6 address %s?', - self._fpr, ipv6) - continue + it, otherwise they don't match. + + Logs a warning-level message if the fallback would be an exact match, + but one of the id, ipv4, ipv4 orport, ipv4 dirport, or ipv6 orport + have changed. """ + if not self.id_matches(entry['id'], exact=True): + # can't log here unless we match an IP and port, because every relay's + # fingerprint is compared to every entry's fingerprint + if self.ipv4_and_orport_matches(entry['ipv4'], + entry['orport'], + exact=True): + logging.warning('%s excluded: has OR %s:%d changed fingerprint to ' + + '%s?', entry['id'], self.dirip, self.orport, + self._fpr) + if self.ipv6_and_orport_matches(entry.get('ipv6_addr'), + entry.get('ipv6_orport'), + exact=True): + logging.warning('%s excluded: has OR %s changed fingerprint to ' + + '%s?', entry['id'], entry['ipv6'], self._fpr) + return False + if not self.ipv4_addr_matches(entry['ipv4'], exact=True): + logging.warning('%s excluded: has it changed IPv4 from %s to %s?', + self._fpr, entry['ipv4'], self.dirip) + return False + if not self.ipv4_dirport_matches(entry['dirport'], exact=True): + logging.warning('%s excluded: has it changed DirPort from %s:%d to ' + + '%s:%d?', self._fpr, self.dirip, int(entry['dirport']), + self.dirip, self.dirport) + return False + if not self.ipv4_orport_matches(entry['orport'], exact=True): + logging.warning('%s excluded: has it changed ORPort from %s:%d to ' + + '%s:%d?', self._fpr, self.dirip, int(entry['orport']), + self.dirip, self.orport) + return False + if entry.has_key('ipv6') and self.has_ipv6(): + # if both entry and fallback have an ipv6 address, compare them + if not self.ipv6_and_orport_matches(entry['ipv6_addr'], + entry['ipv6_orport'], + exact=True): + logging.warning('%s excluded: has it changed IPv6 ORPort from %s ' + + 'to %s:%d?', self._fpr, entry['ipv6'], + self.ipv6addr, self.ipv6orport) + return False + # if the fallback has an IPv6 address but the whitelist entry + # doesn't, or vice versa, the whitelist entry doesn't match + elif entry.has_key('ipv6') and not self.has_ipv6(): + logging.warning('%s excluded: has it lost its former IPv6 address %s?', + self._fpr, entry['ipv6']) + return False + elif not entry.has_key('ipv6') and self.has_ipv6(): + logging.warning('%s excluded: has it gained an IPv6 address %s:%d?', + self._fpr, self.ipv6addr, self.ipv6orport) + return False + return True + + def entry_matches_fuzzy(self, entry): + """ Is entry a fuzzy match for this fallback? + A fallback is a fuzzy match for entry if at least one of these keys + in entry matches: + id + ipv4 + ipv6 (if present in both the fallback and whitelist) + The ports and nickname are ignored. Missing or extra ipv6 addresses + are ignored. + + Doesn't log any warning messages. """ + if self.id_matches(entry['id'], exact=False): return True + if self.ipv4_addr_matches(entry['ipv4'], exact=False): + return True + if entry.has_key('ipv6') and self.has_ipv6(): + # if both entry and fallback have an ipv6 address, compare them + if self.ipv6_addr_matches(entry['ipv6_addr'], exact=False): + return True + return False + + def is_in_whitelist(self, relaylist, exact=False): + """ If exact is True (existing fallback list), check if this fallback is + an exact match for any whitelist entry, using entry_matches_exact(). + + If exact is False (new fallback whitelist), check if this fallback is + a fuzzy match for any whitelist entry, using entry_matches_fuzzy(). """ + for entry in relaylist: + if exact: + if self.entry_matches_exact(entry): + return True + else: + if self.entry_matches_fuzzy(entry): + return True return False def cw_to_bw_factor(self): @@ -1124,6 +1252,7 @@ class Candidate(object): ).run()[0] end = datetime.datetime.utcnow() time_since_expiry = (end - consensus.valid_until).total_seconds() + time_until_valid = (consensus.valid_after - end).total_seconds() except Exception, stem_error: end = datetime.datetime.utcnow() log_excluded('Unable to retrieve a consensus from %s: %s', nickname, @@ -1141,8 +1270,17 @@ class Candidate(object): download_failed = True elif (time_since_expiry > 0): status = 'outdated consensus, expired %ds ago'%(int(time_since_expiry)) - if time_since_expiry <= CONSENSUS_EXPIRY_TOLERANCE: - status += ', tolerating up to %ds'%(CONSENSUS_EXPIRY_TOLERANCE) + if time_since_expiry <= REASONABLY_LIVE_TIME: + status += ', tolerating up to %ds'%(REASONABLY_LIVE_TIME) + level = logging.INFO + else: + status += ', invalid' + level = logging.WARNING + download_failed = True + elif (time_until_valid > 0): + status = 'future consensus, valid in %ds'%(int(time_until_valid)) + if time_until_valid <= REASONABLY_LIVE_TIME: + status += ', tolerating up to %ds'%(REASONABLY_LIVE_TIME) level = logging.INFO else: status += ', invalid' @@ -1400,7 +1538,7 @@ class CandidateList(dict): each line's key/value pairs are placed in a dictonary, (of string -> string key/value pairs), and these dictionaries are placed in an array. - comments start with # and are ignored """ + comments start with # and are ignored. """ file_data = file_obj['data'] file_name = file_obj['name'] relaylist = [] @@ -1440,18 +1578,28 @@ class CandidateList(dict): relay_entry['dirport'] = ipv4_maybe_dirport_split[1] elif kvl == 2: relay_entry[key_value_split[0]] = key_value_split[1] + # split ipv6 addresses and orports + if key_value_split[0] == 'ipv6': + ipv6_orport_split = key_value_split[1].rsplit(':', 1) + ipv6l = len(ipv6_orport_split) + if ipv6l != 2: + print '#error Bad %s IPv6 item: %s, format is [ipv6]:orport.'%( + file_name, item) + relay_entry['ipv6_addr'] = ipv6_orport_split[0] + relay_entry['ipv6_orport'] = ipv6_orport_split[1] relaylist.append(relay_entry) return relaylist - # apply the fallback whitelist - def apply_filter_lists(self, whitelist_obj): + def apply_filter_lists(self, whitelist_obj, exact=False): + """ Apply the fallback whitelist_obj to this fallback list, + passing exact to is_in_whitelist(). """ excluded_count = 0 logging.debug('Applying whitelist') # parse the whitelist whitelist = self.load_relaylist(whitelist_obj) filtered_fallbacks = [] for f in self.fallbacks: - in_whitelist = f.is_in_whitelist(whitelist) + in_whitelist = f.is_in_whitelist(whitelist, exact=exact) if in_whitelist: # include filtered_fallbacks.append(f) @@ -2064,14 +2212,14 @@ def process_existing(): logging.getLogger('stem').setLevel(logging.INFO) whitelist = {'data': parse_fallback_file(FALLBACK_FILE_NAME), 'name': FALLBACK_FILE_NAME} - list_fallbacks(whitelist) + list_fallbacks(whitelist, exact=True) def process_default(): logging.basicConfig(level=logging.WARNING) logging.getLogger('stem').setLevel(logging.WARNING) whitelist = {'data': read_from_file(WHITELIST_FILE_NAME, MAX_LIST_FILE_SIZE), 'name': WHITELIST_FILE_NAME} - list_fallbacks(whitelist) + list_fallbacks(whitelist, exact=False) ## Main Function def main(): @@ -2092,10 +2240,10 @@ def log_excluded(msg, *args): else: logging.info(msg, *args) -def list_fallbacks(whitelist): +def list_fallbacks(whitelist, exact=False): """ Fetches required onionoo documents and evaluates the - fallback directory criteria for each of the relays """ - + fallback directory criteria for each of the relays, + passing exact to apply_filter_lists(). """ print "/* type=fallback */" print ("/* version={} */" .format(cleanse_c_multiline_comment(FALLBACK_FORMAT_VERSION))) @@ -2135,7 +2283,7 @@ def list_fallbacks(whitelist): # warning that the details have changed from those in the whitelist. # instead, there will be an info-level log during the eligibility check. initial_count = len(candidates.fallbacks) - excluded_count = candidates.apply_filter_lists(whitelist) + excluded_count = candidates.apply_filter_lists(whitelist, exact=exact) print candidates.summarise_filters(initial_count, excluded_count) eligible_count = len(candidates.fallbacks) |