aboutsummaryrefslogtreecommitdiff
path: root/scripts/maint
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/maint')
-rwxr-xr-xscripts/maint/updateFallbackDirs.py102
1 files changed, 67 insertions, 35 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
index 53676c08fa..5c9b320ee4 100755
--- a/scripts/maint/updateFallbackDirs.py
+++ b/scripts/maint/updateFallbackDirs.py
@@ -27,7 +27,7 @@ import dateutil.parser
#from bson import json_util
import logging
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(level=logging.DEBUG)
## Top-Level Configuration
@@ -91,7 +91,7 @@ PERMITTED_BADEXIT = .00
FALLBACK_PROPORTION_OF_GUARDS = None if OUTPUT_CANDIDATES else 0.2
# Limit the number of fallbacks (eliminating lowest by weight)
-MAX_FALLBACK_COUNT = 500
+MAX_FALLBACK_COUNT = None if OUTPUT_CANDIDATES else 500
# Emit a C #error if the number of fallbacks is below
MIN_FALLBACK_COUNT = 100
@@ -266,6 +266,17 @@ def load_json_from_file(json_file_name):
## OnionOO Functions
+def datestr_to_datetime(datestr):
+ # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
+ if datestr is not None:
+ dt = dateutil.parser.parse(datestr)
+ else:
+ # Never modified - use start of epoch
+ dt = datetime.datetime.utcfromtimestamp(0)
+ # strip any timezone out (in case they're supported in future)
+ dt = dt.replace(tzinfo=None)
+ return dt
+
def onionoo_fetch(what, **kwargs):
params = kwargs
params['type'] = 'relay'
@@ -304,37 +315,42 @@ def onionoo_fetch(what, **kwargs):
if last_mod_date is not None:
request.add_header('If-modified-since', last_mod_date)
- # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
- if last_mod_date is not None:
- last_mod = dateutil.parser.parse(last_mod_date)
- else:
- # Never modified - use start of epoch
- last_mod = datetime.datetime.utcfromtimestamp(0)
- # strip any timezone out (in case they're supported in future)
- last_mod = last_mod.replace(tzinfo=None)
+ # Parse last modified date
+ last_mod = datestr_to_datetime(last_mod_date)
+
+ # Not Modified and still recent enough to be useful
+ # Onionoo / Globe used to use 6 hours, but we can afford a day
+ required_freshness = datetime.datetime.utcnow()
+ # strip any timezone out (to match dateutil.parser)
+ required_freshness = required_freshness.replace(tzinfo=None)
+ required_freshness -= datetime.timedelta(hours=24)
+ # Make the OnionOO request
response_code = 0
try:
response = urllib2.urlopen(request)
response_code = response.getcode()
except urllib2.HTTPError, error:
response_code = error.code
- # strip any timezone out (to match dateutil.parser)
- six_hours_ago = datetime.datetime.utcnow()
- six_hours_ago = six_hours_ago.replace(tzinfo=None)
- six_hours_ago -= datetime.timedelta(hours=6)
- # Not Modified and still recent enough to be useful (Globe uses 6 hours)
- if response_code == 304:
- if last_mod < six_hours_ago:
- raise Exception("Outdated data from " + url + ": "
- + str(error.code) + ": " + error.reason)
- else:
- pass
+ if response_code == 304: # not modified
+ pass
else:
raise Exception("Could not get " + url + ": "
+ str(error.code) + ": " + error.reason)
if response_code == 200: # OK
+ last_mod = datestr_to_datetime(response.info().get('Last-Modified'))
+
+ # Check for freshness
+ if last_mod < required_freshness:
+ if last_mod_date is not None:
+ date_message = "Outdated data: last updated " + last_mod_date
+ else:
+ date_message = "No data: never downloaded "
+ raise Exception(date_message + " from " + url)
+
+ # Process the data
+ if response_code == 200: # OK
response_json = load_possibly_compressed_response_json(response)
@@ -579,9 +595,15 @@ class Candidate(object):
%(p, which))
for v in reversed(h['values']):
if (this_ts <= newest):
+ agt1 = now - this_ts
+ agt2 = interval
+ agetmp1 = (agt1.microseconds + (agt1.seconds + agt1.days * 24 * 3600)
+ * 10**6) / 10**6
+ agetmp2 = (agt2.microseconds + (agt2.seconds + agt2.days * 24 * 3600)
+ * 10**6) / 10**6
generic_history.append(
- { 'age': (now - this_ts).total_seconds(),
- 'length': interval.total_seconds(),
+ { 'age': agetmp1,
+ 'length': agetmp2,
'value': v
})
newest = this_ts
@@ -599,6 +621,8 @@ class Candidate(object):
def _avg_generic_history(generic_history):
a = []
for i in generic_history:
+ if i['age'] > (ADDRESS_AND_PORT_STABLE_DAYS * 24 * 3600):
+ continue
if (i['length'] is not None
and i['age'] is not None
and i['value'] is not None):
@@ -608,7 +632,11 @@ class Candidate(object):
sv = math.fsum(map(lambda x: x[0], a))
sw = math.fsum(map(lambda x: x[1], a))
- return sv/sw
+ if sw == 0.0:
+ svw = 0.0
+ else:
+ svw = sv/sw
+ return svw
def _add_generic_history(self, history):
periods = r['read_history'].keys()
@@ -659,10 +687,6 @@ class Candidate(object):
logging.debug('%s not a candidate: running avg too low (%lf)',
self._fpr, self._running)
return False
- if self._guard < CUTOFF_GUARD:
- logging.debug('%s not a candidate: guard avg too low (%lf)',
- self._fpr, self._guard)
- return False
if self._v2dir < CUTOFF_V2DIR:
logging.debug('%s not a candidate: v2dir avg too low (%lf)',
self._fpr, self._v2dir)
@@ -675,6 +699,10 @@ class Candidate(object):
if (not self._data.has_key('recommended_version')
or not self._data['recommended_version']):
return False
+ if self._guard < CUTOFF_GUARD:
+ logging.debug('%s not a candidate: guard avg too low (%lf)',
+ self._fpr, self._guard)
+ return False
return True
def is_in_whitelist(self, relaylist):
@@ -998,7 +1026,8 @@ class CandidateList(dict):
# starting with the lowest-weighted fallbacks
# total_weight should be recalculated after calling this
def exclude_excess_fallbacks(self):
- self.fallbacks = self.fallbacks[:MAX_FALLBACK_COUNT]
+ if MAX_FALLBACK_COUNT is not None:
+ self.fallbacks = self.fallbacks[:MAX_FALLBACK_COUNT]
# Clamp the weight of all fallbacks to MAX_WEIGHT_FRACTION * total_weight
# fallbacks are kept sorted, but since excessive weights are reduced to
@@ -1069,15 +1098,15 @@ class CandidateList(dict):
else:
fallback_proportion = ' (%d * %f)'%(guard_count,
FALLBACK_PROPORTION_OF_GUARDS)
- s += 'Final Count: %d (Eligible %d, Usable %d, Target %d%s, '%(
+ s += 'Final Count: %d (Eligible %d, Usable %d, Target %d%s'%(
min(max_count, fallback_count),
eligible_count,
fallback_count,
target_count,
fallback_proportion)
- s += 'Clamped to %d)'%(
- MAX_FALLBACK_COUNT)
- s += '\n'
+ if MAX_FALLBACK_COUNT is not None:
+ s += ', Clamped to %d'%(MAX_FALLBACK_COUNT)
+ s += ')\n'
if fallback_count < MIN_FALLBACK_COUNT:
s += '*/'
s += '\n'
@@ -1147,13 +1176,16 @@ def list_fallbacks():
guard_count = candidates.count_guards()
if FALLBACK_PROPORTION_OF_GUARDS is None:
- target_count = MAX_FALLBACK_COUNT
+ target_count = guard_count
else:
target_count = int(guard_count * FALLBACK_PROPORTION_OF_GUARDS)
# the maximum number of fallbacks is the least of:
# - the target fallback count (FALLBACK_PROPORTION_OF_GUARDS * guard count)
# - the maximum fallback count (MAX_FALLBACK_COUNT)
- max_count = min(target_count, MAX_FALLBACK_COUNT)
+ if MAX_FALLBACK_COUNT is None:
+ max_count = guard_count
+ else:
+ max_count = min(target_count, MAX_FALLBACK_COUNT)
candidates.compute_fallbacks()