summaryrefslogtreecommitdiff
path: root/scripts/maint/updateFallbackDirs.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/maint/updateFallbackDirs.py')
-rwxr-xr-xscripts/maint/updateFallbackDirs.py75
1 files changed, 74 insertions, 1 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
index 4e4000dceb..d1f50c70c5 100755
--- a/scripts/maint/updateFallbackDirs.py
+++ b/scripts/maint/updateFallbackDirs.py
@@ -47,7 +47,7 @@ import copy
import re
from stem.descriptor import DocumentHandler
-from stem.descriptor.remote import get_consensus
+from stem.descriptor.remote import get_consensus, get_server_descriptors, MAX_FINGERPRINTS
import logging
logging.root.name = ''
@@ -565,6 +565,7 @@ class Candidate(object):
if not self.has_ipv6():
logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
self._compute_version()
+ self._extra_info_cache = None
def _stable_sort_or_addresses(self):
# replace self._data['or_addresses'] with a stable ordering,
@@ -1332,6 +1333,7 @@ class Candidate(object):
# "address:dirport orport=port id=fingerprint"
# "[ipv6=addr:orport]"
# /* nickname=name */
+ # /* extrainfo={0,1} */
# ,
#
# Do we want a C string, or a commented-out string?
@@ -1362,6 +1364,14 @@ class Candidate(object):
if not comment_string:
s += ' */'
s += '\n'
+ # if we know that the fallback is an extrainfo cache, flag it
+ # and if we don't know, assume it is not
+ if not comment_string:
+ s += '/* '
+ s += 'extrainfo=%d'%(1 if self._extra_info_cache else 0)
+ if not comment_string:
+ s += ' */'
+ s += '\n'
s += ','
if comment_string:
s += '\n'
@@ -1747,6 +1757,53 @@ class CandidateList(dict):
self.fallbacks = family_limit_fallbacks
return original_count - len(self.fallbacks)
+ # try once to get the descriptors for fingerprint_list using stem
+ # returns an empty list on exception
+ @staticmethod
+ def get_fallback_descriptors_once(fingerprint_list):
+ desc_list = get_server_descriptors(fingerprints=fingerprint_list).run(suppress=True)
+ return desc_list
+
+ # try up to max_retries times to get the descriptors for fingerprint_list
+ # using stem. Stops retrying when all descriptors have been retrieved.
+ # returns a list containing the descriptors that were retrieved
+ @staticmethod
+ def get_fallback_descriptors(fingerprint_list, max_retries=5):
+ # we can't use stem's retries=, because we want to support more than 96
+ # descriptors
+ #
+ # add an attempt for every MAX_FINGERPRINTS (or part thereof) in the list
+ max_retries += (len(fingerprint_list) + MAX_FINGERPRINTS - 1) / MAX_FINGERPRINTS
+ remaining_list = fingerprint_list
+ desc_list = []
+ for _ in xrange(max_retries):
+ if len(remaining_list) == 0:
+ break
+ new_desc_list = CandidateList.get_fallback_descriptors_once(remaining_list[0:MAX_FINGERPRINTS])
+ for d in new_desc_list:
+ try:
+ remaining_list.remove(d.fingerprint)
+ except ValueError:
+ # warn and ignore if a directory mirror returned a bad descriptor
+ logging.warning("Directory mirror returned unwanted descriptor %s, ignoring",
+ d.fingerprint)
+ continue
+ desc_list.append(d)
+ return desc_list
+
+ # find the fallbacks that cache extra-info documents
+ # Onionoo doesn't know this, so we have to use stem
+ def mark_extra_info_caches(self):
+ fingerprint_list = [ f._fpr for f in self.fallbacks ]
+ logging.info("Downloading fallback descriptors to find extra-info caches")
+ desc_list = CandidateList.get_fallback_descriptors(fingerprint_list)
+ for d in desc_list:
+ self[d.fingerprint]._extra_info_cache = d.extra_info_cache
+ missing_descriptor_list = [ f._fpr for f in self.fallbacks
+ if f._extra_info_cache is None ]
+ for f in missing_descriptor_list:
+ logging.warning("No descriptor for {}. Assuming extrainfo=0.".format(f))
+
# try a download check on each fallback candidate in order
# stop after max_count successful downloads
# but don't remove any candidates from the array
@@ -1994,6 +2051,18 @@ class CandidateList(dict):
CandidateList.describe_percentage(dir_count,
fallback_count)))
+ # return a list of fallbacks which cache extra-info documents
+ def fallbacks_with_extra_info_cache(self):
+ return filter(lambda x: x._extra_info_cache, self.fallbacks)
+
+ # log a message about the proportion of fallbacks that cache extra-info docs
+ def describe_fallback_extra_info_caches(self):
+ extra_info_falback_count = len(self.fallbacks_with_extra_info_cache())
+ fallback_count = len(self.fallbacks)
+ logging.warning('%s of fallbacks cache extra-info documents'%(
+ CandidateList.describe_percentage(extra_info_falback_count,
+ fallback_count)))
+
# return a list of fallbacks which have the Exit flag
def fallbacks_with_exit(self):
return filter(lambda x: x.is_exit(), self.fallbacks)
@@ -2193,6 +2262,9 @@ def list_fallbacks(whitelist, blacklist):
'This may take some time.')
failed_count = candidates.perform_download_consensus_checks(max_count)
+ # work out which fallbacks cache extra-infos
+ candidates.mark_extra_info_caches()
+
# analyse and log interesting diversity metrics
# like netblock, ports, exit, IPv4-only
# (we can't easily analyse AS, and it's hard to accurately analyse country)
@@ -2201,6 +2273,7 @@ def list_fallbacks(whitelist, blacklist):
if HAVE_IPADDRESS:
candidates.describe_fallback_netblocks()
candidates.describe_fallback_ports()
+ candidates.describe_fallback_extra_info_caches()
candidates.describe_fallback_exit_flag()
# output C comments summarising the fallback selection process