diff options
-rw-r--r-- | changes/ticket40560 | 5 | ||||
-rw-r--r-- | src/feature/nodelist/networkstatus.c | 1 | ||||
-rw-r--r-- | src/feature/relay/onion_queue.c | 2 | ||||
-rw-r--r-- | src/feature/stats/rephist.c | 145 | ||||
-rw-r--r-- | src/feature/stats/rephist.h | 2 | ||||
-rw-r--r-- | src/lib/crypt_ops/crypto_rand.c | 2 | ||||
-rw-r--r-- | src/test/test_stats.c | 60 |
7 files changed, 213 insertions, 4 deletions
diff --git a/changes/ticket40560 b/changes/ticket40560 new file mode 100644 index 0000000000..01dc134ed7 --- /dev/null +++ b/changes/ticket40560 @@ -0,0 +1,5 @@ + o Minor bugfixes (relay, overload): + - Use a fraction and assessment period of ntor handshake drops for the + overload general signal. Before that, a single drop could trigger an + overload state which was far from useful. Fixes bug 40560; bugfix on + 0.4.7.1-alpha. diff --git a/src/feature/nodelist/networkstatus.c b/src/feature/nodelist/networkstatus.c index 41fd312295..666083ae50 100644 --- a/src/feature/nodelist/networkstatus.c +++ b/src/feature/nodelist/networkstatus.c @@ -1666,6 +1666,7 @@ notify_before_networkstatus_changes(const networkstatus_t *old_c, dos_consensus_has_changed(new_c); relay_consensus_has_changed(new_c); hs_dos_consensus_has_changed(new_c); + rep_hist_consensus_has_changed(new_c); } /* Called after a new consensus has been put in the global state. It is safe diff --git a/src/feature/relay/onion_queue.c b/src/feature/relay/onion_queue.c index b0bb71a084..81a655135d 100644 --- a/src/feature/relay/onion_queue.c +++ b/src/feature/relay/onion_queue.c @@ -191,8 +191,6 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin) rep_hist_note_circuit_handshake_dropped(queue_idx); if (queue_idx == ONION_HANDSHAKE_TYPE_NTOR) { char *m; - /* Note this ntor onionskin drop as an overload */ - rep_hist_note_overload(OVERLOAD_GENERAL); if ((m = rate_limit_log(&last_warned, approx_time()))) { log_warn(LD_GENERAL, "Your computer is too slow to handle this many circuit " diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c index 11a75bbb14..52bd94aba9 100644 --- a/src/feature/stats/rephist.c +++ b/src/feature/stats/rephist.c @@ -2062,6 +2062,56 @@ STATIC int onion_handshakes_assigned[MAX_ONION_STAT_TYPE+1] = {0}; static uint64_t stats_n_onionskin_assigned[MAX_ONION_STAT_TYPE+1] = {0}; static uint64_t stats_n_onionskin_dropped[MAX_ONION_STAT_TYPE+1] = {0}; +/* We use a scale here so we can represent percentages with decimal points by + * scaling the value by this factor and so 0.5% becomes a value of 500. + * Default is 1% and thus min and max range is 0 to 100%. */ +#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE 1000.0 +#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT 1000 +#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_MIN 0 +#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_MAX 100000 + +/** Consensus parameter: indicate what fraction of ntor onionskin drop over the + * total number of requests must be reached before we trigger a general + * overload signal.*/ +static double overload_onionskin_ntor_fraction = + OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT / + OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE / 100.0; + +/* Number of seconds for the assessment period. Default is 6 hours (21600) and + * the min max range is within a 32bit value. We align this period to the + * Heartbeat so the logs would match this period more or less. */ +#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT (60 * 60 * 6) +#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MIN 0 +#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MAX INT32_MAX + +/** Consensus parameter: Period, in seconds, over which we count the number of + * ntor onionskins requests and how many were dropped. After that period, we + * assess if we trigger an overload or not. */ +static int32_t overload_onionskin_ntor_period_secs = + OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT; + +/** Structure containing information for an assessment period of the onionskin + * drop overload general signal. + * + * It is used to track, within a time period, how many requests we've gotten + * and how many were dropped. The overload general signal is decided from these + * depending on some consensus parameters. */ +typedef struct { + /** Total number of ntor onionskin requested for an assessment period. */ + uint64_t n_ntor_requested; + + /** Total number of dropped ntor onionskins for an assessment period. */ + uint64_t n_ntor_dropped; + + /** When is the next assessment time of the general overload for ntor + * onionskin drop. Once this time is reached, all stats are reset and this + * time is set to the next assessment time. */ + time_t next_assessment_time; +} overload_onionskin_assessment_t; + +/** Keep track of the onionskin requests for an assessment period. */ +static overload_onionskin_assessment_t overload_onionskin_assessment; + /** * We combine ntorv3 and ntor into the same stat, so we must * use this function to covert the cell type to a stat index. @@ -2080,11 +2130,75 @@ onionskin_type_to_stat(uint16_t type) return type; } +/** Assess our ntor handshake statistics and decide if we need to emit a + * general overload signal. + * + * Regardless of overloaded or not, if the assessment time period has passed, + * the stats are reset back to 0 and the assessment time period updated. + * + * This is called when a ntor handshake is _requested_ because we want to avoid + * to have an assymetric situation where requested counter is reset to 0 but + * then a drop happens leading to the drop counter being incremented while the + * requested counter is 0. */ +static void +overload_general_onionskin_assessment(void) +{ + /* Initialize the time. Should be done once. */ + if (overload_onionskin_assessment.next_assessment_time == 0) { + goto reset; + } + + /* Not the time yet. */ + if (overload_onionskin_assessment.next_assessment_time > approx_time()) { + goto done; + } + + /* Make sure we have enough requests to be able to make a proper assessment. + * We want to avoid 1 single request/drop to trigger an overload as we want + * at least the number of requests to be above the scale of our fraction. */ + if (overload_onionskin_assessment.n_ntor_requested < + OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE) { + goto done; + } + + /* Lets see if we can signal a general overload. */ + double fraction = (double) overload_onionskin_assessment.n_ntor_dropped / + (double) overload_onionskin_assessment.n_ntor_requested; + if (fraction >= overload_onionskin_ntor_fraction) { + log_notice(LD_HIST, "General overload -> Ntor dropped (%" PRIu64 ") " + "fraction %.4f%% is above threshold of %.4f%%", + overload_onionskin_assessment.n_ntor_dropped, + fraction * 100.0, + overload_onionskin_ntor_fraction * 100.0); + rep_hist_note_overload(OVERLOAD_GENERAL); + } + + reset: + /* Reset counters for the next period. */ + overload_onionskin_assessment.n_ntor_dropped = 0; + overload_onionskin_assessment.n_ntor_requested = 0; + overload_onionskin_assessment.next_assessment_time = + approx_time() + overload_onionskin_ntor_period_secs; + + done: + return; +} + /** A new onionskin (using the <b>type</b> handshake) has arrived. */ void rep_hist_note_circuit_handshake_requested(uint16_t type) { - onion_handshakes_requested[onionskin_type_to_stat(type)]++; + uint16_t stat = onionskin_type_to_stat(type); + + onion_handshakes_requested[stat]++; + + /* Only relays get to record requested onionskins. */ + if (stat == ONION_HANDSHAKE_TYPE_NTOR) { + /* Assess if we've reached the overload general signal. */ + overload_general_onionskin_assessment(); + + overload_onionskin_assessment.n_ntor_requested++; + } } /** We've sent an onionskin (using the <b>type</b> handshake) to a @@ -2101,7 +2215,15 @@ rep_hist_note_circuit_handshake_assigned(uint16_t type) void rep_hist_note_circuit_handshake_dropped(uint16_t type) { - stats_n_onionskin_dropped[onionskin_type_to_stat(type)]++; + uint16_t stat = onionskin_type_to_stat(type); + + stats_n_onionskin_dropped[stat]++; + + /* Only relays get to record requested onionskins. */ + if (stat == ONION_HANDSHAKE_TYPE_NTOR) { + /* Note the dropped ntor in the overload assessment object. */ + overload_onionskin_assessment.n_ntor_dropped++; + } } /** Get the circuit handshake value that is requested. */ @@ -2704,6 +2826,25 @@ rep_hist_free_all(void) tor_assert_nonfatal_once(rephist_total_num == 0); } +/** Called just before the consensus will be replaced. Update the consensus + * parameters in case they changed. */ +void +rep_hist_consensus_has_changed(const networkstatus_t *ns) +{ + overload_onionskin_ntor_fraction = + networkstatus_get_param(ns, "overload_onionskin_ntor_scale_percent", + OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT, + OVERLOAD_ONIONSKIN_NTOR_PERCENT_MIN, + OVERLOAD_ONIONSKIN_NTOR_PERCENT_MAX) / + OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE / 100.0; + + overload_onionskin_ntor_period_secs = + networkstatus_get_param(ns, "overload_onionskin_ntor_period_secs", + OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT, + OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MIN, + OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MAX); +} + #ifdef TOR_UNIT_TESTS /* only exists for unit tests: get HSv2 stats object */ const hs_v2_stats_t * diff --git a/src/feature/stats/rephist.h b/src/feature/stats/rephist.h index 2fb24a10a7..e8f43fbb1d 100644 --- a/src/feature/stats/rephist.h +++ b/src/feature/stats/rephist.h @@ -89,6 +89,8 @@ uint64_t rep_hist_get_n_dns_request(int type); void rep_hist_note_dns_request(int type); void rep_hist_note_dns_error(int type, uint8_t error); +void rep_hist_consensus_has_changed(const networkstatus_t *ns); + /** We combine ntor and ntorv3 stats, so we have 3 stat types: * tap, fast, and ntor. The max type is ntor (2) */ #define MAX_ONION_STAT_TYPE ONION_HANDSHAKE_TYPE_NTOR diff --git a/src/lib/crypt_ops/crypto_rand.c b/src/lib/crypt_ops/crypto_rand.c index 5bf3a65b3b..f39ee6c24f 100644 --- a/src/lib/crypt_ops/crypto_rand.c +++ b/src/lib/crypt_ops/crypto_rand.c @@ -568,6 +568,8 @@ crypto_random_hostname(int min_rand_len, int max_rand_len, const char *prefix, prefixlen = strlen(prefix); resultlen = prefixlen + strlen(suffix) + randlen + 16; + /* (x+(n-1))/n is an idiom for dividing x by n, rounding up to the nearest + * integer and thus why this construction. */ rand_bytes_len = ((randlen*5)+7)/8; if (rand_bytes_len % 5) rand_bytes_len += 5 - (rand_bytes_len%5); diff --git a/src/test/test_stats.c b/src/test/test_stats.c index c7e0c10845..22d65b1e54 100644 --- a/src/test/test_stats.c +++ b/src/test/test_stats.c @@ -867,6 +867,65 @@ test_overload_stats(void *arg) tor_free(stats_str); } +/** Test the overload stats logic. */ +static void +test_overload_onionskin_ntor(void *arg) +{ + char *stats_str = NULL; + (void) arg; + uint16_t type = ONION_HANDSHAKE_TYPE_NTOR_V3; + + /* Lets simulate a series of timeouts but below our default 1% threshold. */ + + for (int i = 0; i < 1000; i++) { + rep_hist_note_circuit_handshake_requested(type); + /* This should trigger 9 drop which is just below 1% (10) */ + if (i > 0 && !(i % 100)) { + rep_hist_note_circuit_handshake_dropped(type); + } + } + + /* No overload yet. */ + stats_str = rep_hist_get_overload_general_line(); + tt_assert(!stats_str); + + /* Move it 6 hours in the future and see if we get a general overload. */ + update_approx_time(approx_time() + 21600); + + /* This request should NOT trigger the general overload because we are below + * our default of 1%. */ + rep_hist_note_circuit_handshake_requested(type); + stats_str = rep_hist_get_overload_general_line(); + tt_assert(!stats_str); + + /* We'll now go above our 1% threshold. */ + for (int i = 0; i < 1000; i++) { + rep_hist_note_circuit_handshake_requested(type); + /* This should trigger 10 timeouts which is our threshold of 1% (10) */ + if (!(i % 10)) { + rep_hist_note_circuit_handshake_dropped(type); + } + } + + /* Move it 6 hours in the future and see if we get a general overload. */ + update_approx_time(approx_time() + 21600); + + /* This request should trigger the general overload because above 1%. */ + rep_hist_note_circuit_handshake_requested(type); + stats_str = rep_hist_get_overload_general_line(); + tt_assert(stats_str); + tor_free(stats_str); + + /* Move 72h in the future, we should NOT get an overload anymore. */ + update_approx_time(approx_time() + (72 * 3600)); + + stats_str = rep_hist_get_overload_general_line(); + tt_assert(!stats_str); + + done: + tor_free(stats_str); +} + #define ENT(name) \ { #name, test_ ## name , 0, NULL, NULL } #define FORK(name) \ @@ -883,6 +942,7 @@ struct testcase_t stats_tests[] = { FORK(rephist_v3_onions), FORK(load_stats_file), FORK(overload_stats), + FORK(overload_onionskin_ntor), END_OF_TESTCASES }; |