From 977e396e866052a20de73f4e8121e514d4eff774 Mon Sep 17 00:00:00 2001 From: Roger Dingledine Date: Fri, 11 Mar 2011 22:12:15 -0500 Subject: improve accuracy for when a relay went unreachable --- src/or/dirserv.c | 13 +++++++++---- src/or/dirserv.h | 8 ++++++++ src/or/main.c | 5 +++-- 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/or/dirserv.c b/src/or/dirserv.c index aeeab45383..73869273a3 100644 --- a/src/or/dirserv.c +++ b/src/or/dirserv.c @@ -970,7 +970,11 @@ dirserv_set_router_is_running(routerinfo_t *router, time_t now) if (!answer && running_long_enough_to_decide_unreachable()) { /* not considered reachable. tell rephist. */ - rep_hist_note_router_unreachable(router->cache_info.identity_digest, now); + time_t when = now; + if (router->last_reachable && + router->last_reachable + REACHABILITY_TEST_PERIOD < now) + when = router->last_reachable + REACHABILITY_TEST_PERIOD; + rep_hist_note_router_unreachable(router->cache_info.identity_digest, when); } router->is_running = answer; @@ -3187,7 +3191,8 @@ dirserv_single_reachability_test(time_t now, routerinfo_t *router) * try a few connections per call. * * The load balancing is such that if we get called once every ten - * seconds, we will cycle through all the tests in 1280 seconds (a + * seconds, we will cycle through all the tests in + * 10*REACHABILITY_MASK_PER_TEST seconds (a * bit over 20 minutes). */ void @@ -3214,11 +3219,11 @@ dirserv_test_reachability(time_t now) continue; /* bridge authorities only test reachability on bridges */ // if (router->cache_info.published_on > cutoff) // continue; - if ((((uint8_t)id_digest[0]) % 128) == ctr) { + if ((((uint8_t)id_digest[0]) % REACHABILITY_MODULO_PER_TEST) == ctr) { dirserv_single_reachability_test(now, router); } } SMARTLIST_FOREACH_END(router); - ctr = (ctr + 1) % 128; /* increment ctr */ + ctr = (ctr + 1) % REACHABILITY_MODULO_PER_TEST; /* increment ctr */ } /** Given a fingerprint fp which is either set if we're looking for a diff --git a/src/or/dirserv.h b/src/or/dirserv.h index 56ad7a6a56..949482ba73 100644 --- a/src/or/dirserv.h +++ b/src/or/dirserv.h @@ -12,6 +12,14 @@ #ifndef _TOR_DIRSERV_H #define _TOR_DIRSERV_H +/** What fraction (1 over this number) of the relay ID space do we + * (as a directory authority) launch connections to at each reachability + * test? */ +#define REACHABILITY_MODULO_PER_TEST 128 + +/** How many seconds apart are the reachability tests for a given relay? */ +#define REACHABILITY_TEST_PERIOD (10*REACHABILITY_MODULO_PER_TEST) + /** Maximum length of an exit policy summary. */ #define MAX_EXITPOLICY_SUMMARY_LEN 1000 diff --git a/src/or/main.c b/src/or/main.c index 979a2bec5c..558608ec8c 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -962,8 +962,9 @@ run_scheduled_events(time_t now) if (accounting_is_enabled(options)) accounting_run_housekeeping(now); - if (now % 10 == 0 && (authdir_mode_tests_reachability(options)) && - !we_are_hibernating()) { + if (now % REACHABILITY_TEST_PERIOD/REACHABILITY_MODULO_PER_TEST == 0 && + (authdir_mode_tests_reachability(options)) && + !we_are_hibernating()) { /* try to determine reachability of the other Tor relays */ dirserv_test_reachability(now); } -- cgit v1.2.3-54-g00ecf From 176fde505fde38afa56ddce04606118b9080546e Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Sat, 12 Mar 2011 00:19:52 -0500 Subject: Tweak bug2716 patch a little Name the magic value "10" rather than re-deriving it. Comment more. Use the pattern that works for periodic timers, not the pattern that doesn't work. ;) --- src/or/dirserv.c | 12 +++++++++--- src/or/dirserv.h | 6 +++++- src/or/main.c | 4 +++- 3 files changed, 17 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/or/dirserv.c b/src/or/dirserv.c index 73869273a3..b68e005531 100644 --- a/src/or/dirserv.c +++ b/src/or/dirserv.c @@ -969,11 +969,17 @@ dirserv_set_router_is_running(routerinfo_t *router, time_t now) } if (!answer && running_long_enough_to_decide_unreachable()) { - /* not considered reachable. tell rephist. */ + /* Not considered reachable. tell rephist about that. + + Because we launch a reachability test for each router every + REACHABILITY_TEST_CYCLE_PERIOD seconds, then the router has probably + been down since at least that time after we last successfully reached + it. + */ time_t when = now; if (router->last_reachable && - router->last_reachable + REACHABILITY_TEST_PERIOD < now) - when = router->last_reachable + REACHABILITY_TEST_PERIOD; + router->last_reachable + REACHABILITY_TEST_CYCLE_PERIOD < now) + when = router->last_reachable + REACHABILITY_TEST_CYCLE_PERIOD; rep_hist_note_router_unreachable(router->cache_info.identity_digest, when); } diff --git a/src/or/dirserv.h b/src/or/dirserv.h index 949482ba73..569abfca2e 100644 --- a/src/or/dirserv.h +++ b/src/or/dirserv.h @@ -17,8 +17,12 @@ * test? */ #define REACHABILITY_MODULO_PER_TEST 128 +/** How often (in seconds) do we launch reachability tests? */ +#define REACHABILITY_TEST_INTERVAL 10 + /** How many seconds apart are the reachability tests for a given relay? */ -#define REACHABILITY_TEST_PERIOD (10*REACHABILITY_MODULO_PER_TEST) +#define REACHABILITY_TEST_CYCLE_PERIOD \ + (REACHABILITY_TEST_INTERVAL*REACHABILITY_MODULO_PER_TEST) /** Maximum length of an exit policy summary. */ #define MAX_EXITPOLICY_SUMMARY_LEN 1000 diff --git a/src/or/main.c b/src/or/main.c index 558608ec8c..214a4fad5d 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -872,6 +872,7 @@ run_scheduled_events(time_t now) static time_t time_to_check_for_expired_networkstatus = 0; static time_t time_to_write_stats_files = 0; static time_t time_to_write_bridge_stats = 0; + static time_t time_to_launch_reachability_tests = 0; static int should_init_bridge_stats = 1; static time_t time_to_retry_dns_init = 0; or_options_t *options = get_options(); @@ -962,9 +963,10 @@ run_scheduled_events(time_t now) if (accounting_is_enabled(options)) accounting_run_housekeeping(now); - if (now % REACHABILITY_TEST_PERIOD/REACHABILITY_MODULO_PER_TEST == 0 && + if (time_to_launch_reachability_tests < now && (authdir_mode_tests_reachability(options)) && !we_are_hibernating()) { + time_to_launch_reachability_tests = now + REACHABILITY_TEST_INTERVAL; /* try to determine reachability of the other Tor relays */ dirserv_test_reachability(now); } -- cgit v1.2.3-54-g00ecf From 61f648ae3764daacb3865c8344f0381fd09a8e45 Mon Sep 17 00:00:00 2001 From: Roger Dingledine Date: Sun, 13 Mar 2011 15:22:45 -0400 Subject: fix one more typo --- src/or/dirserv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/or/dirserv.c b/src/or/dirserv.c index b68e005531..09da66d202 100644 --- a/src/or/dirserv.c +++ b/src/or/dirserv.c @@ -3198,8 +3198,7 @@ dirserv_single_reachability_test(time_t now, routerinfo_t *router) * * The load balancing is such that if we get called once every ten * seconds, we will cycle through all the tests in - * 10*REACHABILITY_MASK_PER_TEST seconds (a - * bit over 20 minutes). + * REACHABILITY_TEST_CYCLE_PERIOD seconds (a bit over 20 minutes). */ void dirserv_test_reachability(time_t now) -- cgit v1.2.3-54-g00ecf