summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorteor <teor2345@gmail.com>2016-12-10 21:35:23 +1100
committerteor <teor2345@gmail.com>2016-12-19 15:06:00 +1100
commit654367f0260ff144898dc0ac2a45b710e7350f29 (patch)
treeb7ffec4303ebae9f0ea9588ae9cdcaeda4292be6
parent2d2bbaf259f5a886ad858f2c5fc75c72e0369f5b (diff)
downloadtor-654367f0260ff144898dc0ac2a45b710e7350f29.tar.gz
tor-654367f0260ff144898dc0ac2a45b710e7350f29.zip
Allow fallbacks serving consensuses that expired less than 24 hours ago
This works around #20909, where relays serve stale consensuses for a short time, and then recover. Update to the fix for #20539.
-rw-r--r--changes/fallbacks-2016123
-rwxr-xr-xscripts/maint/updateFallbackDirs.py35
2 files changed, 29 insertions, 9 deletions
diff --git a/changes/fallbacks-201612 b/changes/fallbacks-201612
index 5bb7bddc4a..03d9843949 100644
--- a/changes/fallbacks-201612
+++ b/changes/fallbacks-201612
@@ -13,7 +13,8 @@
- Make it easier to change the output sort order of fallbacks.
Closes ticket 20822.
- Exclude relays affected by 20499 from the fallback list. Exclude known
- affected versions, and any relay that delivers a stale consensus.
+ affected versions, and any relay that delivers a stale consensus, as
+ long as that consensus expired more than 24 hours ago.
Closes ticket 20539.
- Require fallbacks to have flags for 90% of the time (weighted decaying
average), rather than 95%. This allows at least 73% of clients to
diff --git a/scripts/maint/updateFallbackDirs.py b/scripts/maint/updateFallbackDirs.py
index 41d210f29e..4f60fd4223 100755
--- a/scripts/maint/updateFallbackDirs.py
+++ b/scripts/maint/updateFallbackDirs.py
@@ -88,6 +88,19 @@ MUST_BE_RUNNING_NOW = (PERFORM_IPV4_DIRPORT_CHECKS
# Clients have been using microdesc consensuses by default for a while now
DOWNLOAD_MICRODESC_CONSENSUS = True
+# If a relay delivers an expired consensus, if it expired less than this many
+# seconds ago, we still allow the relay. This should never be less than -90,
+# as all directory mirrors should have downloaded a consensus 90 minutes
+# before it expires. It should never be more than 24 hours, because clients
+# reject consensuses that are older than REASONABLY_LIVE_TIME.
+# For the consensus expiry check to be accurate, the machine running this
+# script needs an accurate clock.
+# We use 24 hours to compensate for #20909, where relays on 0.2.9.5-alpha and
+# 0.3.0.0-alpha-dev and later deliver stale consensuses, but typically recover
+# after ~12 hours.
+# We should make this lower when #20909 is fixed, see #20942.
+CONSENSUS_EXPIRY_TOLERANCE = 24*60*60
+
# Output fallback name, flags, bandwidth, and ContactInfo in a C comment?
OUTPUT_COMMENTS = True if OUTPUT_CANDIDATES else False
@@ -1146,7 +1159,6 @@ class Candidate(object):
def fallback_consensus_download_speed(dirip, dirport, nickname, fingerprint,
max_time):
download_failed = False
- start = datetime.datetime.utcnow()
# some directory mirrors respond to requests in ways that hang python
# sockets, which is why we log this line here
logging.info('Initiating %sconsensus download from %s (%s:%d) %s.',
@@ -1155,6 +1167,7 @@ class Candidate(object):
# there appears to be about 1 second of overhead when comparing stem's
# internal trace time and the elapsed time calculated here
TIMEOUT_SLOP = 1.0
+ start = datetime.datetime.utcnow()
try:
consensus = get_consensus(
endpoints = [(dirip, dirport)],
@@ -1165,26 +1178,32 @@ class Candidate(object):
document_handler = DocumentHandler.BARE_DOCUMENT,
microdescriptor = DOWNLOAD_MICRODESC_CONSENSUS
).run()[0]
+ end = datetime.datetime.utcnow()
+ time_since_expiry = (end - consensus.valid_until).total_seconds()
except Exception, stem_error:
+ end = datetime.datetime.utcnow()
logging.info('Unable to retrieve a consensus from %s: %s', nickname,
stem_error)
status = 'error: "%s"' % (stem_error)
level = logging.WARNING
download_failed = True
- elapsed = (datetime.datetime.utcnow() - start).total_seconds()
+ elapsed = (end - start).total_seconds()
if download_failed:
- # keep the error failure status
+ # keep the error failure status, and avoid using the variables
pass
elif elapsed > max_time:
status = 'too slow'
level = logging.WARNING
download_failed = True
- elif datetime.datetime.utcnow() > consensus.valid_until:
- time_since_expiry = (datetime.datetime.utcnow() -
- consensus.valid_until).total_seconds()
+ elif (time_since_expiry > 0):
status = 'outdated consensus, expired %ds ago'%(int(time_since_expiry))
- level = logging.WARNING
- download_failed = True
+ if time_since_expiry <= CONSENSUS_EXPIRY_TOLERANCE:
+ status += ', tolerating up to %ds'%(CONSENSUS_EXPIRY_TOLERANCE)
+ level = logging.INFO
+ else:
+ status += ', invalid'
+ level = logging.WARNING
+ download_failed = True
else:
status = 'ok'
level = logging.DEBUG