diff options
-rw-r--r-- | changes/bug40396 | 10 | ||||
-rw-r--r-- | changes/bug40496 | 6 | ||||
-rw-r--r-- | changes/bug40497 | 8 | ||||
-rw-r--r-- | doc/man/tor.1.txt | 11 | ||||
-rw-r--r-- | src/feature/client/bridges.c | 22 | ||||
-rw-r--r-- | src/feature/client/bridges.h | 3 | ||||
-rw-r--r-- | src/feature/client/entrynodes.c | 39 | ||||
-rw-r--r-- | src/feature/dirclient/dlstatus.c | 17 | ||||
-rw-r--r-- | src/feature/nodelist/nodelist.c | 10 | ||||
-rw-r--r-- | src/feature/nodelist/routerlist.c | 9 | ||||
-rw-r--r-- | src/test/test_dir.c | 8 |
11 files changed, 113 insertions, 30 deletions
diff --git a/changes/bug40396 b/changes/bug40396 new file mode 100644 index 0000000000..f9d5b8ecb2 --- /dev/null +++ b/changes/bug40396 @@ -0,0 +1,10 @@ + o Major bugfixes (bridges): + - Make Tor work reliably again when you have multiple bridges + configured and one or more of them are unreachable. The problem + came because we require that we have bridge descriptors for both + of our first two bridges (else we refuse to try to connect), but + in some cases we would wait three hours before trying to fetch + these missing descriptors, and/or never recover when we do try + to fetch them. Fixes bugs 40396 and 40495; bugfix on 0.3.0.5-rc + and 0.3.2.1-alpha. + diff --git a/changes/bug40496 b/changes/bug40496 new file mode 100644 index 0000000000..b626cc51fe --- /dev/null +++ b/changes/bug40496 @@ -0,0 +1,6 @@ + o Minor bugfixes (logging): + - When we no longer have enough directory information to use the + network, we would log a notice-level message -- but we would not + reliably log a message when we recovered and resumed using the + network. Now make sure there is always a corresponding message + about recovering. Fixes bug 40496; bugfix on 0.3.5.1-alpha. diff --git a/changes/bug40497 b/changes/bug40497 new file mode 100644 index 0000000000..d3004d0b72 --- /dev/null +++ b/changes/bug40497 @@ -0,0 +1,8 @@ + o Minor bugfixes (bridges): + - When we don't yet have a descriptor for one of our bridges, disable + the entry guard retry schedule on that bridge. The entry guard retry + schedule and the bridge descriptor retry schedule can conflict, + e.g. where we mark a bridge as "maybe up" yet we don't try to fetch + its descriptor yet, leading Tor to wait (refusing to do anything) + until it becomes time to fetch the descriptor. Fixes bug 40497; + bugfix on 0.3.0.3-alpha. diff --git a/doc/man/tor.1.txt b/doc/man/tor.1.txt index a3ff9e1830..5627b4f01f 100644 --- a/doc/man/tor.1.txt +++ b/doc/man/tor.1.txt @@ -3576,14 +3576,15 @@ The following options are used for running a testing Tor network. [[TestingAuthKeySlop]] **TestingAuthKeySlop** __N__ **seconds**|**minutes**|**hours** + [[TestingBridgeBootstrapDownloadInitialDelay]] **TestingBridgeBootstrapDownloadInitialDelay** __N__:: - Initial delay in seconds for when clients should download each bridge descriptor when they - have just started, or when they can not contact any of their bridges. + Initial delay in seconds for how long clients should wait before + downloading a bridge descriptor for a new bridge. Changing this requires that **TestingTorNetwork** is set. (Default: 0) [[TestingBridgeDownloadInitialDelay]] **TestingBridgeDownloadInitialDelay** __N__:: - Initial delay in seconds for when clients should download each bridge descriptor when they - know that one or more of their configured bridges are running. Changing - this requires that **TestingTorNetwork** is set. (Default: 10800) + How long to wait (in seconds) once clients have successfully + downloaded a bridge descriptor, before trying another download for + that same bridge. Changing this requires that **TestingTorNetwork** + is set. (Default: 10800) [[TestingClientConsensusDownloadInitialDelay]] **TestingClientConsensusDownloadInitialDelay** __N__:: Initial delay in seconds for when clients should download consensuses. Changing this diff --git a/src/feature/client/bridges.c b/src/feature/client/bridges.c index d40bcc6c8e..9e36d26929 100644 --- a/src/feature/client/bridges.c +++ b/src/feature/client/bridges.c @@ -943,9 +943,17 @@ rewrite_node_address_for_bridge(const bridge_info_t *bridge, node_t *node) } /** We just learned a descriptor for a bridge. See if that - * digest is in our entry guard list, and add it if not. */ + * digest is in our entry guard list, and add it if not. Schedule the + * next fetch for a long time from now, and initiate any follow-up + * activities like continuing to bootstrap. + * + * <b>from_cache</b> * tells us whether we fetched it from disk (else + * the network) + * + * <b>desc_is_new</b> tells us if we preferred it to the old version we + * had, if any. */ void -learned_bridge_descriptor(routerinfo_t *ri, int from_cache) +learned_bridge_descriptor(routerinfo_t *ri, int from_cache, int desc_is_new) { tor_assert(ri); tor_assert(ri->purpose == ROUTER_PURPOSE_BRIDGE); @@ -961,12 +969,14 @@ learned_bridge_descriptor(routerinfo_t *ri, int from_cache) if (bridge) { /* if we actually want to use this one */ node_t *node; - /* it's here; schedule its re-fetch for a long time from now. */ if (!from_cache) { /* This schedules the re-fetch at a constant interval, which produces * a pattern of bridge traffic. But it's better than trying all * configured bridges several times in the first few minutes. */ download_status_reset(&bridge->fetch_status); + /* it's here; schedule its re-fetch for a long time from now. */ + bridge->fetch_status.next_attempt_at += + get_options()->TestingBridgeDownloadInitialDelay; } node = node_get_mutable_by_id(ri->cache_info.identity_digest); @@ -982,8 +992,10 @@ learned_bridge_descriptor(routerinfo_t *ri, int from_cache) entry_guard_learned_bridge_identity(&bridge->addrport_configured, (const uint8_t*)ri->cache_info.identity_digest); - log_notice(LD_DIR, "new bridge descriptor '%s' (%s): %s", ri->nickname, - from_cache ? "cached" : "fresh", router_describe(ri)); + if (desc_is_new) + log_notice(LD_DIR, "new bridge descriptor '%s' (%s): %s", + ri->nickname, + from_cache ? "cached" : "fresh", router_describe(ri)); /* If we didn't have a reachable bridge before this one, try directory * documents again. */ if (first) { diff --git a/src/feature/client/bridges.h b/src/feature/client/bridges.h index a42363f683..dd3e498a0a 100644 --- a/src/feature/client/bridges.h +++ b/src/feature/client/bridges.h @@ -46,7 +46,8 @@ void learned_router_identity(const tor_addr_t *addr, uint16_t port, void bridge_add_from_config(struct bridge_line_t *bridge_line); void retry_bridge_descriptor_fetch_directly(const char *digest); void fetch_bridge_descriptors(const or_options_t *options, time_t now); -void learned_bridge_descriptor(routerinfo_t *ri, int from_cache); +void learned_bridge_descriptor(routerinfo_t *ri, + int from_cache, int desc_is_new); const smartlist_t *get_socks_args_by_bridge_addrport(const tor_addr_t *addr, uint16_t port); diff --git a/src/feature/client/entrynodes.c b/src/feature/client/entrynodes.c index 315bd54674..32ecb4f705 100644 --- a/src/feature/client/entrynodes.c +++ b/src/feature/client/entrynodes.c @@ -132,6 +132,7 @@ #include "feature/client/entrynodes.h" #include "feature/client/transports.h" #include "feature/control/control_events.h" +#include "feature/dirclient/dlstatus.h" #include "feature/dircommon/directory.h" #include "feature/nodelist/describe.h" #include "feature/nodelist/microdesc.h" @@ -576,6 +577,18 @@ mark_guard_maybe_reachable(entry_guard_t *guard) guard->is_reachable = GUARD_REACHABLE_MAYBE; if (guard->is_filtered_guard) guard->is_usable_filtered_guard = 1; + + /* Check if it is a bridge and we don't have its descriptor yet */ + if (guard->bridge_addr && !guard_has_descriptor(guard)) { + /* Reset the descriptor fetch retry schedule, so it gives it another + * go soon. It's important to keep any "REACHABLE_MAYBE" bridges in + * sync with the descriptor fetch schedule, since we will refuse to + * use the network until our first primary bridges are either + * known-usable or known-unusable. See bug 40396. */ + download_status_t *dl = get_bridge_dl_status_by_id(guard->identity); + if (dl) + download_status_reset(dl); + } } /** @@ -2046,6 +2059,14 @@ entry_guard_consider_retry(entry_guard_t *guard) get_retry_schedule(guard->failing_since, now, guard->is_primary); const time_t last_attempt = guard->last_tried_to_connect; + /* Check if it is a bridge and we don't have its descriptor yet */ + if (guard->bridge_addr && !guard_has_descriptor(guard)) { + /* We want to leave the retry schedule to fetch_bridge_descriptors(), + * so we don't have two retry schedules clobbering each other. See + * bugs 40396 and 40497 for details of why we need this exception. */ + return; + } + if (BUG(last_attempt == 0) || now >= last_attempt + delay) { /* We should mark this retriable. */ @@ -2271,6 +2292,13 @@ entry_guards_note_guard_failure(guard_selection_t *gs, guard->is_primary?"primary ":"", guard->confirmed_idx>=0?"confirmed ":"", entry_guard_describe(guard)); + + /* Schedule a re-assessment of whether we have enough dir info to + * use the network. Counterintuitively, *losing* a bridge might actually + * be just what we need to *resume* using the network, if we had it in + * state GUARD_REACHABLE_MAYBE and we were stalling to learn this + * outcome. See bug 40396 for more details. */ + router_dir_info_changed(); } /** @@ -2295,6 +2323,12 @@ entry_guards_note_guard_success(guard_selection_t *gs, /* If guard was not already marked as reachable, send a GUARD UP signal */ if (guard->is_reachable != GUARD_REACHABLE_YES) { control_event_guard(guard->nickname, guard->identity, "UP"); + + /* Schedule a re-assessment of whether we have enough dir info to + * use the network. One of our guards has just moved to + * GUARD_REACHABLE_YES, so maybe we can resume using the network + * now. */ + router_dir_info_changed(); } guard->is_reachable = GUARD_REACHABLE_YES; @@ -3538,6 +3572,11 @@ entry_guards_changed_for_guard_selection(guard_selection_t *gs) entry_guards_update_guards_in_state() */ or_state_mark_dirty(get_or_state(), when); + + /* Schedule a re-assessment of whether we have enough dir info to + * use the network. When we add or remove or disable or enable a + * guard, the decision could shift. */ + router_dir_info_changed(); } /** Our list of entry guards has changed for the default guard selection diff --git a/src/feature/dirclient/dlstatus.c b/src/feature/dirclient/dlstatus.c index 8be2983a5d..c21dd113b4 100644 --- a/src/feature/dirclient/dlstatus.c +++ b/src/feature/dirclient/dlstatus.c @@ -73,15 +73,14 @@ find_dl_min_delay(const download_status_t *dls, const or_options_t *options) } } case DL_SCHED_BRIDGE: - if (options->UseBridges && num_bridges_usable(0) > 0) { - /* A bridge client that is sure that one or more of its bridges are - * running can afford to wait longer to update bridge descriptors. */ - return options->TestingBridgeDownloadInitialDelay; - } else { - /* A bridge client which might have no running bridges, must try to - * get bridge descriptors straight away. */ - return options->TestingBridgeBootstrapDownloadInitialDelay; - } + /* Be conservative here: always return the 'during bootstrap' delay + * value, so we never delay while trying to fetch descriptors + * for new bridges. Once we do succeed at fetching a descriptor + * for our bridge, we will adjust its next_attempt_at based on + * the longer "TestingBridgeDownloadInitialDelay" value. See + * learned_bridge_descriptor() for details. + */ + return options->TestingBridgeBootstrapDownloadInitialDelay; default: tor_assert(0); } diff --git a/src/feature/nodelist/nodelist.c b/src/feature/nodelist/nodelist.c index 121dc8823a..c676e8dfb4 100644 --- a/src/feature/nodelist/nodelist.c +++ b/src/feature/nodelist/nodelist.c @@ -2820,6 +2820,7 @@ update_router_have_minimum_dir_info(void) const networkstatus_t *consensus = networkstatus_get_reasonably_live_consensus(now,usable_consensus_flavor()); int using_md; + static int be_loud_when_things_work_again = 0; if (!consensus) { if (!networkstatus_get_latest_consensus()) @@ -2875,8 +2876,9 @@ update_router_have_minimum_dir_info(void) if (res && !have_min_dir_info) { control_event_client_status(LOG_NOTICE, "ENOUGH_DIR_INFO"); control_event_boot_dir(BOOTSTRAP_STATUS_ENOUGH_DIRINFO, 0); - log_info(LD_DIR, - "We now have enough directory information to build circuits."); + tor_log(be_loud_when_things_work_again ? LOG_NOTICE : LOG_INFO, LD_DIR, + "We now have enough directory information to build circuits."); + be_loud_when_things_work_again = 0; } /* If paths have just become unavailable in this update. */ @@ -2885,6 +2887,10 @@ update_router_have_minimum_dir_info(void) tor_log(quiet ? LOG_INFO : LOG_NOTICE, LD_DIR, "Our directory information is no longer up-to-date " "enough to build circuits: %s", dir_info_status); + if (!quiet) { + /* remember to do a notice-level log when things come back */ + be_loud_when_things_work_again = 1; + } /* a) make us log when we next complete a circuit, so we know when Tor * is back up and usable, and b) disable some activities that Tor diff --git a/src/feature/nodelist/routerlist.c b/src/feature/nodelist/routerlist.c index fbf9e026c2..c00f7ffb26 100644 --- a/src/feature/nodelist/routerlist.c +++ b/src/feature/nodelist/routerlist.c @@ -1617,6 +1617,13 @@ router_add_to_routerlist(routerinfo_t *router, const char **msg, "descriptor for router %s", router_describe(router)); } else { + if (router->purpose == ROUTER_PURPOSE_BRIDGE) { + /* Even if we're not going to keep this descriptor, we need to + * let the bridge descriptor fetch subsystem know that we + * succeeded at getting it -- so we can adjust the retry schedule + * to stop trying for a while. */ + learned_bridge_descriptor(router, from_cache, 0); + } log_info(LD_DIR, "Dropping descriptor that we already have for router %s", router_describe(router)); @@ -2047,7 +2054,7 @@ routerlist_descriptors_added(smartlist_t *sl, int from_cache) control_event_descriptors_changed(sl); SMARTLIST_FOREACH_BEGIN(sl, routerinfo_t *, ri) { if (ri->purpose == ROUTER_PURPOSE_BRIDGE) - learned_bridge_descriptor(ri, from_cache); + learned_bridge_descriptor(ri, from_cache, 1); if (ri->needs_retest_if_added) { ri->needs_retest_if_added = 0; dirserv_single_reachability_test(approx_time(), ri); diff --git a/src/test/test_dir.c b/src/test/test_dir.c index 0d2d6800ba..186e09f236 100644 --- a/src/test/test_dir.c +++ b/src/test/test_dir.c @@ -6652,13 +6652,7 @@ test_dir_find_dl_min_delay(void* data) dls.schedule = DL_SCHED_BRIDGE; /* client */ - mock_options->ClientOnly = 1; - mock_options->UseBridges = 1; - if (num_bridges_usable(0) > 0) { - tt_int_op(find_dl_min_delay(&dls, mock_options), OP_EQ, bridge); - } else { - tt_int_op(find_dl_min_delay(&dls, mock_options), OP_EQ, bridge_bootstrap); - } + tt_int_op(find_dl_min_delay(&dls, mock_options), OP_EQ, bridge_bootstrap); done: UNMOCK(networkstatus_consensus_is_bootstrapping); |