diff options
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/maint/updateFallbackDirs.py | 54 |
1 files changed, 35 insertions, 19 deletions
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py index 20bfd1896f..3e8a22bc12 100755 --- a/scripts/maint/updateFallbackDirs.py +++ b/scripts/maint/updateFallbackDirs.py @@ -266,6 +266,17 @@ def load_json_from_file(json_file_name): ## OnionOO Functions +def datestr_to_datetime(datestr): + # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT + if datestr is not None: + dt = dateutil.parser.parse(datestr) + else: + # Never modified - use start of epoch + dt = datetime.datetime.utcfromtimestamp(0) + # strip any timezone out (in case they're supported in future) + dt = dt.replace(tzinfo=None) + return dt + def onionoo_fetch(what, **kwargs): params = kwargs params['type'] = 'relay' @@ -304,37 +315,42 @@ def onionoo_fetch(what, **kwargs): if last_mod_date is not None: request.add_header('If-modified-since', last_mod_date) - # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT - if last_mod_date is not None: - last_mod = dateutil.parser.parse(last_mod_date) - else: - # Never modified - use start of epoch - last_mod = datetime.datetime.utcfromtimestamp(0) - # strip any timezone out (in case they're supported in future) - last_mod = last_mod.replace(tzinfo=None) + # Parse last modified date + last_mod = datestr_to_datetime(last_mod_date) + + # Not Modified and still recent enough to be useful + # Onionoo / Globe used to use 6 hours, but we can afford a day + required_freshness = datetime.datetime.utcnow() + # strip any timezone out (to match dateutil.parser) + required_freshness = required_freshness.replace(tzinfo=None) + required_freshness -= datetime.timedelta(hours=24) + # Make the OnionOO request response_code = 0 try: response = urllib2.urlopen(request) response_code = response.getcode() except urllib2.HTTPError, error: response_code = error.code - # strip any timezone out (to match dateutil.parser) - six_hours_ago = datetime.datetime.utcnow() - six_hours_ago = six_hours_ago.replace(tzinfo=None) - six_hours_ago -= datetime.timedelta(hours=6) - # Not Modified and still recent enough to be useful (Globe uses 6 hours) - if response_code == 304: - if last_mod < six_hours_ago: - raise Exception("Outdated data from " + url + ": " - + str(error.code) + ": " + error.reason) - else: - pass + if response_code == 304: # not modified + pass else: raise Exception("Could not get " + url + ": " + str(error.code) + ": " + error.reason) if response_code == 200: # OK + last_mod = datestr_to_datetime(response.info().get('Last-Modified')) + + # Check for freshness + if last_mod < required_freshness: + if last_mod_date is not None: + date_message = "Outdated data: last updated " + last_mod_date + else: + date_message = "No data: never downloaded " + raise Exception(date_message + " from " + url) + + # Process the data + if response_code == 200: # OK response_json = load_possibly_compressed_response_json(response) |