aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Færøy <ahf@torproject.org>2021-11-02 15:28:56 +0000
committerAlexander Færøy <ahf@torproject.org>2021-11-02 15:28:56 +0000
commitf6600377b495c276cbaf423d9c8907abd4cb3c82 (patch)
tree5b331edebb32463cb98bb2611d172c4753ad1c12
parentb109161c8f1c2acac7eee32ad86cc727975eeb08 (diff)
parent7084ec871070e1f01a48b7735367b94abe1feb21 (diff)
downloadtor-f6600377b495c276cbaf423d9c8907abd4cb3c82.tar.gz
tor-f6600377b495c276cbaf423d9c8907abd4cb3c82.zip
Merge remote-tracking branch 'tor-gitlab/mr/474' into main
-rw-r--r--changes/bug4039610
-rw-r--r--changes/bug404966
-rw-r--r--changes/bug404978
-rw-r--r--doc/man/tor.1.txt11
-rw-r--r--src/feature/client/bridges.c22
-rw-r--r--src/feature/client/bridges.h3
-rw-r--r--src/feature/client/entrynodes.c39
-rw-r--r--src/feature/dirclient/dlstatus.c17
-rw-r--r--src/feature/nodelist/nodelist.c10
-rw-r--r--src/feature/nodelist/routerlist.c9
-rw-r--r--src/test/test_dir.c8
11 files changed, 113 insertions, 30 deletions
diff --git a/changes/bug40396 b/changes/bug40396
new file mode 100644
index 0000000000..f9d5b8ecb2
--- /dev/null
+++ b/changes/bug40396
@@ -0,0 +1,10 @@
+ o Major bugfixes (bridges):
+ - Make Tor work reliably again when you have multiple bridges
+ configured and one or more of them are unreachable. The problem
+ came because we require that we have bridge descriptors for both
+ of our first two bridges (else we refuse to try to connect), but
+ in some cases we would wait three hours before trying to fetch
+ these missing descriptors, and/or never recover when we do try
+ to fetch them. Fixes bugs 40396 and 40495; bugfix on 0.3.0.5-rc
+ and 0.3.2.1-alpha.
+
diff --git a/changes/bug40496 b/changes/bug40496
new file mode 100644
index 0000000000..b626cc51fe
--- /dev/null
+++ b/changes/bug40496
@@ -0,0 +1,6 @@
+ o Minor bugfixes (logging):
+ - When we no longer have enough directory information to use the
+ network, we would log a notice-level message -- but we would not
+ reliably log a message when we recovered and resumed using the
+ network. Now make sure there is always a corresponding message
+ about recovering. Fixes bug 40496; bugfix on 0.3.5.1-alpha.
diff --git a/changes/bug40497 b/changes/bug40497
new file mode 100644
index 0000000000..d3004d0b72
--- /dev/null
+++ b/changes/bug40497
@@ -0,0 +1,8 @@
+ o Minor bugfixes (bridges):
+ - When we don't yet have a descriptor for one of our bridges, disable
+ the entry guard retry schedule on that bridge. The entry guard retry
+ schedule and the bridge descriptor retry schedule can conflict,
+ e.g. where we mark a bridge as "maybe up" yet we don't try to fetch
+ its descriptor yet, leading Tor to wait (refusing to do anything)
+ until it becomes time to fetch the descriptor. Fixes bug 40497;
+ bugfix on 0.3.0.3-alpha.
diff --git a/doc/man/tor.1.txt b/doc/man/tor.1.txt
index a3ff9e1830..5627b4f01f 100644
--- a/doc/man/tor.1.txt
+++ b/doc/man/tor.1.txt
@@ -3576,14 +3576,15 @@ The following options are used for running a testing Tor network.
[[TestingAuthKeySlop]] **TestingAuthKeySlop** __N__ **seconds**|**minutes**|**hours** +
[[TestingBridgeBootstrapDownloadInitialDelay]] **TestingBridgeBootstrapDownloadInitialDelay** __N__::
- Initial delay in seconds for when clients should download each bridge descriptor when they
- have just started, or when they can not contact any of their bridges.
+ Initial delay in seconds for how long clients should wait before
+ downloading a bridge descriptor for a new bridge.
Changing this requires that **TestingTorNetwork** is set. (Default: 0)
[[TestingBridgeDownloadInitialDelay]] **TestingBridgeDownloadInitialDelay** __N__::
- Initial delay in seconds for when clients should download each bridge descriptor when they
- know that one or more of their configured bridges are running. Changing
- this requires that **TestingTorNetwork** is set. (Default: 10800)
+ How long to wait (in seconds) once clients have successfully
+ downloaded a bridge descriptor, before trying another download for
+ that same bridge. Changing this requires that **TestingTorNetwork**
+ is set. (Default: 10800)
[[TestingClientConsensusDownloadInitialDelay]] **TestingClientConsensusDownloadInitialDelay** __N__::
Initial delay in seconds for when clients should download consensuses. Changing this
diff --git a/src/feature/client/bridges.c b/src/feature/client/bridges.c
index d40bcc6c8e..9e36d26929 100644
--- a/src/feature/client/bridges.c
+++ b/src/feature/client/bridges.c
@@ -943,9 +943,17 @@ rewrite_node_address_for_bridge(const bridge_info_t *bridge, node_t *node)
}
/** We just learned a descriptor for a bridge. See if that
- * digest is in our entry guard list, and add it if not. */
+ * digest is in our entry guard list, and add it if not. Schedule the
+ * next fetch for a long time from now, and initiate any follow-up
+ * activities like continuing to bootstrap.
+ *
+ * <b>from_cache</b> * tells us whether we fetched it from disk (else
+ * the network)
+ *
+ * <b>desc_is_new</b> tells us if we preferred it to the old version we
+ * had, if any. */
void
-learned_bridge_descriptor(routerinfo_t *ri, int from_cache)
+learned_bridge_descriptor(routerinfo_t *ri, int from_cache, int desc_is_new)
{
tor_assert(ri);
tor_assert(ri->purpose == ROUTER_PURPOSE_BRIDGE);
@@ -961,12 +969,14 @@ learned_bridge_descriptor(routerinfo_t *ri, int from_cache)
if (bridge) { /* if we actually want to use this one */
node_t *node;
- /* it's here; schedule its re-fetch for a long time from now. */
if (!from_cache) {
/* This schedules the re-fetch at a constant interval, which produces
* a pattern of bridge traffic. But it's better than trying all
* configured bridges several times in the first few minutes. */
download_status_reset(&bridge->fetch_status);
+ /* it's here; schedule its re-fetch for a long time from now. */
+ bridge->fetch_status.next_attempt_at +=
+ get_options()->TestingBridgeDownloadInitialDelay;
}
node = node_get_mutable_by_id(ri->cache_info.identity_digest);
@@ -982,8 +992,10 @@ learned_bridge_descriptor(routerinfo_t *ri, int from_cache)
entry_guard_learned_bridge_identity(&bridge->addrport_configured,
(const uint8_t*)ri->cache_info.identity_digest);
- log_notice(LD_DIR, "new bridge descriptor '%s' (%s): %s", ri->nickname,
- from_cache ? "cached" : "fresh", router_describe(ri));
+ if (desc_is_new)
+ log_notice(LD_DIR, "new bridge descriptor '%s' (%s): %s",
+ ri->nickname,
+ from_cache ? "cached" : "fresh", router_describe(ri));
/* If we didn't have a reachable bridge before this one, try directory
* documents again. */
if (first) {
diff --git a/src/feature/client/bridges.h b/src/feature/client/bridges.h
index a42363f683..dd3e498a0a 100644
--- a/src/feature/client/bridges.h
+++ b/src/feature/client/bridges.h
@@ -46,7 +46,8 @@ void learned_router_identity(const tor_addr_t *addr, uint16_t port,
void bridge_add_from_config(struct bridge_line_t *bridge_line);
void retry_bridge_descriptor_fetch_directly(const char *digest);
void fetch_bridge_descriptors(const or_options_t *options, time_t now);
-void learned_bridge_descriptor(routerinfo_t *ri, int from_cache);
+void learned_bridge_descriptor(routerinfo_t *ri,
+ int from_cache, int desc_is_new);
const smartlist_t *get_socks_args_by_bridge_addrport(const tor_addr_t *addr,
uint16_t port);
diff --git a/src/feature/client/entrynodes.c b/src/feature/client/entrynodes.c
index 315bd54674..32ecb4f705 100644
--- a/src/feature/client/entrynodes.c
+++ b/src/feature/client/entrynodes.c
@@ -132,6 +132,7 @@
#include "feature/client/entrynodes.h"
#include "feature/client/transports.h"
#include "feature/control/control_events.h"
+#include "feature/dirclient/dlstatus.h"
#include "feature/dircommon/directory.h"
#include "feature/nodelist/describe.h"
#include "feature/nodelist/microdesc.h"
@@ -576,6 +577,18 @@ mark_guard_maybe_reachable(entry_guard_t *guard)
guard->is_reachable = GUARD_REACHABLE_MAYBE;
if (guard->is_filtered_guard)
guard->is_usable_filtered_guard = 1;
+
+ /* Check if it is a bridge and we don't have its descriptor yet */
+ if (guard->bridge_addr && !guard_has_descriptor(guard)) {
+ /* Reset the descriptor fetch retry schedule, so it gives it another
+ * go soon. It's important to keep any "REACHABLE_MAYBE" bridges in
+ * sync with the descriptor fetch schedule, since we will refuse to
+ * use the network until our first primary bridges are either
+ * known-usable or known-unusable. See bug 40396. */
+ download_status_t *dl = get_bridge_dl_status_by_id(guard->identity);
+ if (dl)
+ download_status_reset(dl);
+ }
}
/**
@@ -2046,6 +2059,14 @@ entry_guard_consider_retry(entry_guard_t *guard)
get_retry_schedule(guard->failing_since, now, guard->is_primary);
const time_t last_attempt = guard->last_tried_to_connect;
+ /* Check if it is a bridge and we don't have its descriptor yet */
+ if (guard->bridge_addr && !guard_has_descriptor(guard)) {
+ /* We want to leave the retry schedule to fetch_bridge_descriptors(),
+ * so we don't have two retry schedules clobbering each other. See
+ * bugs 40396 and 40497 for details of why we need this exception. */
+ return;
+ }
+
if (BUG(last_attempt == 0) ||
now >= last_attempt + delay) {
/* We should mark this retriable. */
@@ -2271,6 +2292,13 @@ entry_guards_note_guard_failure(guard_selection_t *gs,
guard->is_primary?"primary ":"",
guard->confirmed_idx>=0?"confirmed ":"",
entry_guard_describe(guard));
+
+ /* Schedule a re-assessment of whether we have enough dir info to
+ * use the network. Counterintuitively, *losing* a bridge might actually
+ * be just what we need to *resume* using the network, if we had it in
+ * state GUARD_REACHABLE_MAYBE and we were stalling to learn this
+ * outcome. See bug 40396 for more details. */
+ router_dir_info_changed();
}
/**
@@ -2295,6 +2323,12 @@ entry_guards_note_guard_success(guard_selection_t *gs,
/* If guard was not already marked as reachable, send a GUARD UP signal */
if (guard->is_reachable != GUARD_REACHABLE_YES) {
control_event_guard(guard->nickname, guard->identity, "UP");
+
+ /* Schedule a re-assessment of whether we have enough dir info to
+ * use the network. One of our guards has just moved to
+ * GUARD_REACHABLE_YES, so maybe we can resume using the network
+ * now. */
+ router_dir_info_changed();
}
guard->is_reachable = GUARD_REACHABLE_YES;
@@ -3538,6 +3572,11 @@ entry_guards_changed_for_guard_selection(guard_selection_t *gs)
entry_guards_update_guards_in_state()
*/
or_state_mark_dirty(get_or_state(), when);
+
+ /* Schedule a re-assessment of whether we have enough dir info to
+ * use the network. When we add or remove or disable or enable a
+ * guard, the decision could shift. */
+ router_dir_info_changed();
}
/** Our list of entry guards has changed for the default guard selection
diff --git a/src/feature/dirclient/dlstatus.c b/src/feature/dirclient/dlstatus.c
index 8be2983a5d..c21dd113b4 100644
--- a/src/feature/dirclient/dlstatus.c
+++ b/src/feature/dirclient/dlstatus.c
@@ -73,15 +73,14 @@ find_dl_min_delay(const download_status_t *dls, const or_options_t *options)
}
}
case DL_SCHED_BRIDGE:
- if (options->UseBridges && num_bridges_usable(0) > 0) {
- /* A bridge client that is sure that one or more of its bridges are
- * running can afford to wait longer to update bridge descriptors. */
- return options->TestingBridgeDownloadInitialDelay;
- } else {
- /* A bridge client which might have no running bridges, must try to
- * get bridge descriptors straight away. */
- return options->TestingBridgeBootstrapDownloadInitialDelay;
- }
+ /* Be conservative here: always return the 'during bootstrap' delay
+ * value, so we never delay while trying to fetch descriptors
+ * for new bridges. Once we do succeed at fetching a descriptor
+ * for our bridge, we will adjust its next_attempt_at based on
+ * the longer "TestingBridgeDownloadInitialDelay" value. See
+ * learned_bridge_descriptor() for details.
+ */
+ return options->TestingBridgeBootstrapDownloadInitialDelay;
default:
tor_assert(0);
}
diff --git a/src/feature/nodelist/nodelist.c b/src/feature/nodelist/nodelist.c
index 121dc8823a..c676e8dfb4 100644
--- a/src/feature/nodelist/nodelist.c
+++ b/src/feature/nodelist/nodelist.c
@@ -2820,6 +2820,7 @@ update_router_have_minimum_dir_info(void)
const networkstatus_t *consensus =
networkstatus_get_reasonably_live_consensus(now,usable_consensus_flavor());
int using_md;
+ static int be_loud_when_things_work_again = 0;
if (!consensus) {
if (!networkstatus_get_latest_consensus())
@@ -2875,8 +2876,9 @@ update_router_have_minimum_dir_info(void)
if (res && !have_min_dir_info) {
control_event_client_status(LOG_NOTICE, "ENOUGH_DIR_INFO");
control_event_boot_dir(BOOTSTRAP_STATUS_ENOUGH_DIRINFO, 0);
- log_info(LD_DIR,
- "We now have enough directory information to build circuits.");
+ tor_log(be_loud_when_things_work_again ? LOG_NOTICE : LOG_INFO, LD_DIR,
+ "We now have enough directory information to build circuits.");
+ be_loud_when_things_work_again = 0;
}
/* If paths have just become unavailable in this update. */
@@ -2885,6 +2887,10 @@ update_router_have_minimum_dir_info(void)
tor_log(quiet ? LOG_INFO : LOG_NOTICE, LD_DIR,
"Our directory information is no longer up-to-date "
"enough to build circuits: %s", dir_info_status);
+ if (!quiet) {
+ /* remember to do a notice-level log when things come back */
+ be_loud_when_things_work_again = 1;
+ }
/* a) make us log when we next complete a circuit, so we know when Tor
* is back up and usable, and b) disable some activities that Tor
diff --git a/src/feature/nodelist/routerlist.c b/src/feature/nodelist/routerlist.c
index fbf9e026c2..c00f7ffb26 100644
--- a/src/feature/nodelist/routerlist.c
+++ b/src/feature/nodelist/routerlist.c
@@ -1617,6 +1617,13 @@ router_add_to_routerlist(routerinfo_t *router, const char **msg,
"descriptor for router %s",
router_describe(router));
} else {
+ if (router->purpose == ROUTER_PURPOSE_BRIDGE) {
+ /* Even if we're not going to keep this descriptor, we need to
+ * let the bridge descriptor fetch subsystem know that we
+ * succeeded at getting it -- so we can adjust the retry schedule
+ * to stop trying for a while. */
+ learned_bridge_descriptor(router, from_cache, 0);
+ }
log_info(LD_DIR,
"Dropping descriptor that we already have for router %s",
router_describe(router));
@@ -2047,7 +2054,7 @@ routerlist_descriptors_added(smartlist_t *sl, int from_cache)
control_event_descriptors_changed(sl);
SMARTLIST_FOREACH_BEGIN(sl, routerinfo_t *, ri) {
if (ri->purpose == ROUTER_PURPOSE_BRIDGE)
- learned_bridge_descriptor(ri, from_cache);
+ learned_bridge_descriptor(ri, from_cache, 1);
if (ri->needs_retest_if_added) {
ri->needs_retest_if_added = 0;
dirserv_single_reachability_test(approx_time(), ri);
diff --git a/src/test/test_dir.c b/src/test/test_dir.c
index 0d2d6800ba..186e09f236 100644
--- a/src/test/test_dir.c
+++ b/src/test/test_dir.c
@@ -6652,13 +6652,7 @@ test_dir_find_dl_min_delay(void* data)
dls.schedule = DL_SCHED_BRIDGE;
/* client */
- mock_options->ClientOnly = 1;
- mock_options->UseBridges = 1;
- if (num_bridges_usable(0) > 0) {
- tt_int_op(find_dl_min_delay(&dls, mock_options), OP_EQ, bridge);
- } else {
- tt_int_op(find_dl_min_delay(&dls, mock_options), OP_EQ, bridge_bootstrap);
- }
+ tt_int_op(find_dl_min_delay(&dls, mock_options), OP_EQ, bridge_bootstrap);
done:
UNMOCK(networkstatus_consensus_is_bootstrapping);