diff options
Diffstat (limited to 'scripts/maint')
-rwxr-xr-x | scripts/maint/updateFallbackDirs.py | 102 |
1 files changed, 67 insertions, 35 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py index 53676c08fa..5c9b320ee4 100755 --- a/scripts/maint/updateFallbackDirs.py +++ b/scripts/maint/updateFallbackDirs.py @@ -27,7 +27,7 @@ import dateutil.parser #from bson import json_util import logging -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=logging.DEBUG) ## Top-Level Configuration @@ -91,7 +91,7 @@ PERMITTED_BADEXIT = .00 FALLBACK_PROPORTION_OF_GUARDS = None if OUTPUT_CANDIDATES else 0.2 # Limit the number of fallbacks (eliminating lowest by weight) -MAX_FALLBACK_COUNT = 500 +MAX_FALLBACK_COUNT = None if OUTPUT_CANDIDATES else 500 # Emit a C #error if the number of fallbacks is below MIN_FALLBACK_COUNT = 100 @@ -266,6 +266,17 @@ def load_json_from_file(json_file_name): ## OnionOO Functions +def datestr_to_datetime(datestr): + # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT + if datestr is not None: + dt = dateutil.parser.parse(datestr) + else: + # Never modified - use start of epoch + dt = datetime.datetime.utcfromtimestamp(0) + # strip any timezone out (in case they're supported in future) + dt = dt.replace(tzinfo=None) + return dt + def onionoo_fetch(what, **kwargs): params = kwargs params['type'] = 'relay' @@ -304,37 +315,42 @@ def onionoo_fetch(what, **kwargs): if last_mod_date is not None: request.add_header('If-modified-since', last_mod_date) - # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT - if last_mod_date is not None: - last_mod = dateutil.parser.parse(last_mod_date) - else: - # Never modified - use start of epoch - last_mod = datetime.datetime.utcfromtimestamp(0) - # strip any timezone out (in case they're supported in future) - last_mod = last_mod.replace(tzinfo=None) + # Parse last modified date + last_mod = datestr_to_datetime(last_mod_date) + + # Not Modified and still recent enough to be useful + # Onionoo / Globe used to use 6 hours, but we can afford a day + required_freshness = datetime.datetime.utcnow() + # strip any timezone out (to match dateutil.parser) + required_freshness = required_freshness.replace(tzinfo=None) + required_freshness -= datetime.timedelta(hours=24) + # Make the OnionOO request response_code = 0 try: response = urllib2.urlopen(request) response_code = response.getcode() except urllib2.HTTPError, error: response_code = error.code - # strip any timezone out (to match dateutil.parser) - six_hours_ago = datetime.datetime.utcnow() - six_hours_ago = six_hours_ago.replace(tzinfo=None) - six_hours_ago -= datetime.timedelta(hours=6) - # Not Modified and still recent enough to be useful (Globe uses 6 hours) - if response_code == 304: - if last_mod < six_hours_ago: - raise Exception("Outdated data from " + url + ": " - + str(error.code) + ": " + error.reason) - else: - pass + if response_code == 304: # not modified + pass else: raise Exception("Could not get " + url + ": " + str(error.code) + ": " + error.reason) if response_code == 200: # OK + last_mod = datestr_to_datetime(response.info().get('Last-Modified')) + + # Check for freshness + if last_mod < required_freshness: + if last_mod_date is not None: + date_message = "Outdated data: last updated " + last_mod_date + else: + date_message = "No data: never downloaded " + raise Exception(date_message + " from " + url) + + # Process the data + if response_code == 200: # OK response_json = load_possibly_compressed_response_json(response) @@ -579,9 +595,15 @@ class Candidate(object): %(p, which)) for v in reversed(h['values']): if (this_ts <= newest): + agt1 = now - this_ts + agt2 = interval + agetmp1 = (agt1.microseconds + (agt1.seconds + agt1.days * 24 * 3600) + * 10**6) / 10**6 + agetmp2 = (agt2.microseconds + (agt2.seconds + agt2.days * 24 * 3600) + * 10**6) / 10**6 generic_history.append( - { 'age': (now - this_ts).total_seconds(), - 'length': interval.total_seconds(), + { 'age': agetmp1, + 'length': agetmp2, 'value': v }) newest = this_ts @@ -599,6 +621,8 @@ class Candidate(object): def _avg_generic_history(generic_history): a = [] for i in generic_history: + if i['age'] > (ADDRESS_AND_PORT_STABLE_DAYS * 24 * 3600): + continue if (i['length'] is not None and i['age'] is not None and i['value'] is not None): @@ -608,7 +632,11 @@ class Candidate(object): sv = math.fsum(map(lambda x: x[0], a)) sw = math.fsum(map(lambda x: x[1], a)) - return sv/sw + if sw == 0.0: + svw = 0.0 + else: + svw = sv/sw + return svw def _add_generic_history(self, history): periods = r['read_history'].keys() @@ -659,10 +687,6 @@ class Candidate(object): logging.debug('%s not a candidate: running avg too low (%lf)', self._fpr, self._running) return False - if self._guard < CUTOFF_GUARD: - logging.debug('%s not a candidate: guard avg too low (%lf)', - self._fpr, self._guard) - return False if self._v2dir < CUTOFF_V2DIR: logging.debug('%s not a candidate: v2dir avg too low (%lf)', self._fpr, self._v2dir) @@ -675,6 +699,10 @@ class Candidate(object): if (not self._data.has_key('recommended_version') or not self._data['recommended_version']): return False + if self._guard < CUTOFF_GUARD: + logging.debug('%s not a candidate: guard avg too low (%lf)', + self._fpr, self._guard) + return False return True def is_in_whitelist(self, relaylist): @@ -998,7 +1026,8 @@ class CandidateList(dict): # starting with the lowest-weighted fallbacks # total_weight should be recalculated after calling this def exclude_excess_fallbacks(self): - self.fallbacks = self.fallbacks[:MAX_FALLBACK_COUNT] + if MAX_FALLBACK_COUNT is not None: + self.fallbacks = self.fallbacks[:MAX_FALLBACK_COUNT] # Clamp the weight of all fallbacks to MAX_WEIGHT_FRACTION * total_weight # fallbacks are kept sorted, but since excessive weights are reduced to @@ -1069,15 +1098,15 @@ class CandidateList(dict): else: fallback_proportion = ' (%d * %f)'%(guard_count, FALLBACK_PROPORTION_OF_GUARDS) - s += 'Final Count: %d (Eligible %d, Usable %d, Target %d%s, '%( + s += 'Final Count: %d (Eligible %d, Usable %d, Target %d%s'%( min(max_count, fallback_count), eligible_count, fallback_count, target_count, fallback_proportion) - s += 'Clamped to %d)'%( - MAX_FALLBACK_COUNT) - s += '\n' + if MAX_FALLBACK_COUNT is not None: + s += ', Clamped to %d'%(MAX_FALLBACK_COUNT) + s += ')\n' if fallback_count < MIN_FALLBACK_COUNT: s += '*/' s += '\n' @@ -1147,13 +1176,16 @@ def list_fallbacks(): guard_count = candidates.count_guards() if FALLBACK_PROPORTION_OF_GUARDS is None: - target_count = MAX_FALLBACK_COUNT + target_count = guard_count else: target_count = int(guard_count * FALLBACK_PROPORTION_OF_GUARDS) # the maximum number of fallbacks is the least of: # - the target fallback count (FALLBACK_PROPORTION_OF_GUARDS * guard count) # - the maximum fallback count (MAX_FALLBACK_COUNT) - max_count = min(target_count, MAX_FALLBACK_COUNT) + if MAX_FALLBACK_COUNT is None: + max_count = guard_count + else: + max_count = min(target_count, MAX_FALLBACK_COUNT) candidates.compute_fallbacks() |