diff options
Diffstat (limited to 'src/or/circuitstats.c')
-rw-r--r-- | src/or/circuitstats.c | 316 |
1 files changed, 271 insertions, 45 deletions
diff --git a/src/or/circuitstats.c b/src/or/circuitstats.c index 418acc0024..f06c2e5e38 100644 --- a/src/or/circuitstats.c +++ b/src/or/circuitstats.c @@ -1,7 +1,7 @@ /* Copyright (c) 2001 Matej Pfajfar. * Copyright (c) 2001-2004, Roger Dingledine. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2016, The Tor Project, Inc. */ + * Copyright (c) 2007-2017, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -36,6 +36,8 @@ #include "rendclient.h" #include "rendservice.h" #include "statefile.h" +#include "circuitlist.h" +#include "circuituse.h" #undef log #include <math.h> @@ -43,6 +45,7 @@ static void cbt_control_event_buildtimeout_set( const circuit_build_times_t *cbt, buildtimeout_set_event_t type); +static void circuit_build_times_scale_circ_counts(circuit_build_times_t *cbt); #define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2)) @@ -60,7 +63,7 @@ static circuit_build_times_t circ_times; static int unit_tests = 0; #else #define unit_tests 0 -#endif +#endif /* defined(TOR_UNIT_TESTS) */ /** Return a pointer to the data structure describing our current circuit * build time history and computations. */ @@ -105,13 +108,21 @@ get_circuit_build_timeout_ms(void) * 6. If we are configured in Single Onion mode */ int -circuit_build_times_disabled(void) +circuit_build_times_disabled(const or_options_t *options) +{ + return circuit_build_times_disabled_(options, 0); +} + +/** As circuit_build_times_disabled, but take options as an argument. */ +int +circuit_build_times_disabled_(const or_options_t *options, + int ignore_consensus) { if (unit_tests) { return 0; } else { - const or_options_t *options = get_options(); - int consensus_disabled = networkstatus_get_param(NULL, "cbtdisabled", + int consensus_disabled = + ignore_consensus ? 0 : networkstatus_get_param(NULL, "cbtdisabled", 0, 0, 1); int config_disabled = !options->LearnCircuitBuildTimeout; int dirauth_disabled = options->AuthoritativeDir; @@ -140,7 +151,7 @@ circuit_build_times_disabled(void) "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d", consensus_disabled, config_disabled, dirauth_disabled, state_disabled); -#endif +#endif /* 0 */ return 1; } else { #if 0 @@ -149,14 +160,14 @@ circuit_build_times_disabled(void) "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d", consensus_disabled, config_disabled, dirauth_disabled, state_disabled); -#endif +#endif /* 0 */ return 0; } } } /** - * Retrieve and bounds-check the cbtmaxtimeouts consensus paramter. + * Retrieve and bounds-check the cbtmaxtimeouts consensus parameter. * * Effect: When this many timeouts happen in the last 'cbtrecentcount' * circuit attempts, the client should discard all of its history and @@ -183,7 +194,7 @@ circuit_build_times_max_timeouts(void) } /** - * Retrieve and bounds-check the cbtnummodes consensus paramter. + * Retrieve and bounds-check the cbtnummodes consensus parameter. * * Effect: This value governs how many modes to use in the weighted * average calculation of Pareto parameter Xm. A value of 3 introduces @@ -210,7 +221,7 @@ circuit_build_times_default_num_xm_modes(void) } /** - * Retrieve and bounds-check the cbtmincircs consensus paramter. + * Retrieve and bounds-check the cbtmincircs consensus parameter. * * Effect: This is the minimum number of circuits to build before * computing a timeout. @@ -242,7 +253,7 @@ circuit_build_times_enough_to_compute(const circuit_build_times_t *cbt) } /** - * Retrieve and bounds-check the cbtquantile consensus paramter. + * Retrieve and bounds-check the cbtquantile consensus parameter. * * Effect: This is the position on the quantile curve to use to set the * timeout value. It is a percent (10-99). @@ -266,7 +277,7 @@ circuit_build_times_quantile_cutoff(void) } /** - * Retrieve and bounds-check the cbtclosequantile consensus paramter. + * Retrieve and bounds-check the cbtclosequantile consensus parameter. * * Effect: This is the position on the quantile curve to use to set the * timeout value to use to actually close circuits. It is a percent @@ -298,7 +309,7 @@ circuit_build_times_close_quantile(void) } /** - * Retrieve and bounds-check the cbttestfreq consensus paramter. + * Retrieve and bounds-check the cbttestfreq consensus parameter. * * Effect: Describes how often in seconds to build a test circuit to * gather timeout values. Only applies if less than 'cbtmincircs' @@ -345,7 +356,7 @@ circuit_build_times_min_timeout(void) } /** - * Retrieve and bounds-check the cbtinitialtimeout consensus paramter. + * Retrieve and bounds-check the cbtinitialtimeout consensus parameter. * * Effect: This is the timeout value to use before computing a timeout, * in milliseconds. @@ -375,7 +386,7 @@ circuit_build_times_initial_timeout(void) } /** - * Retrieve and bounds-check the cbtrecentcount consensus paramter. + * Retrieve and bounds-check the cbtrecentcount consensus parameter. * * Effect: This is the number of circuit build times to keep track of * for deciding if we hit cbtmaxtimeouts and need to reset our state @@ -417,15 +428,20 @@ circuit_build_times_new_consensus_params(circuit_build_times_t *cbt, * update if we aren't. */ - if (!circuit_build_times_disabled()) { + if (!circuit_build_times_disabled(get_options())) { num = circuit_build_times_recent_circuit_count(ns); if (num > 0) { if (num != cbt->liveness.num_recent_circs) { int8_t *recent_circs; - log_notice(LD_CIRC, "The Tor Directory Consensus has changed how many " - "circuits we must track to detect network failures from %d " - "to %d.", cbt->liveness.num_recent_circs, num); + if (cbt->liveness.num_recent_circs > 0) { + log_notice(LD_CIRC, "The Tor Directory Consensus has changed how " + "many circuits we must track to detect network failures " + "from %d to %d.", cbt->liveness.num_recent_circs, num); + } else { + log_notice(LD_CIRC, "Upon receiving a consensus directory, " + "re-enabling circuit-based network failure detection."); + } tor_assert(cbt->liveness.timeouts_after_firsthop || cbt->liveness.num_recent_circs == 0); @@ -493,14 +509,15 @@ static double circuit_build_times_get_initial_timeout(void) { double timeout; + const or_options_t *options = get_options(); /* * Check if we have LearnCircuitBuildTimeout, and if we don't, * always use CircuitBuildTimeout, no questions asked. */ - if (!unit_tests && get_options()->CircuitBuildTimeout) { - timeout = get_options()->CircuitBuildTimeout*1000; - if (!circuit_build_times_disabled() && + if (!unit_tests && options->CircuitBuildTimeout) { + timeout = options->CircuitBuildTimeout*1000; + if (!circuit_build_times_disabled(options) && timeout < circuit_build_times_min_timeout()) { log_warn(LD_CIRC, "Config CircuitBuildTimeout too low. Setting to %ds", circuit_build_times_min_timeout()/1000); @@ -526,6 +543,11 @@ circuit_build_times_reset(circuit_build_times_t *cbt) cbt->total_build_times = 0; cbt->build_times_idx = 0; cbt->have_computed_timeout = 0; + + // Reset timeout and close counts + cbt->num_circ_succeeded = 0; + cbt->num_circ_closed = 0; + cbt->num_circ_timeouts = 0; } /** @@ -542,7 +564,7 @@ circuit_build_times_init(circuit_build_times_t *cbt) * Check if we really are using adaptive timeouts, and don't keep * track of this stuff if not. */ - if (!circuit_build_times_disabled()) { + if (!circuit_build_times_disabled(get_options())) { cbt->liveness.num_recent_circs = circuit_build_times_recent_circuit_count(NULL); cbt->liveness.timeouts_after_firsthop = @@ -599,7 +621,124 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n) "Rewound history by %d places. Current index: %d. " "Total: %d", n, cbt->build_times_idx, cbt->total_build_times); } -#endif +#endif /* 0 */ + +/** + * Mark this circuit as timed out, but change its purpose + * so that it continues to build, allowing us to measure + * its full build time. + */ +void +circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t *circ) +{ + control_event_circuit_status(circ, + CIRC_EVENT_FAILED, + END_CIRC_REASON_TIMEOUT); + circuit_change_purpose(TO_CIRCUIT(circ), + CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT); + /* Record this event to check for too many timeouts + * in a row. This function does not record a time value yet + * (we do that later); it only counts the fact that we did + * have a timeout. We also want to avoid double-counting + * already "relaxed" circuits, which are counted in + * circuit_expire_building(). */ + if (!circ->relaxed_timeout) { + int first_hop_succeeded = circ->cpath && + circ->cpath->state == CPATH_STATE_OPEN; + + circuit_build_times_count_timeout( + get_circuit_build_times_mutable(), + first_hop_succeeded); + } +} + +/** + * Perform the build time work that needs to be done when a circuit + * completes a hop. + * + * This function decides if we should record a circuit's build time + * in our histogram data and other statistics, and if so, records it. + * It also will mark circuits that have already timed out as + * measurement-only circuits, so they can continue to build but + * not get used. + * + * For this, we want to consider circuits that will eventually make + * it to the third hop. For circuits longer than 3 hops, we want to + * record their build time when they reach the third hop, but let + * them continue (and not count them later). For circuits that are + * exactly 3 hops, this will count them when they are completed. We + * do this so that CBT is always gathering statistics on circuits + * of the same length, regardless of their type. + */ +void +circuit_build_times_handle_completed_hop(origin_circuit_t *circ) +{ + struct timeval end; + long timediff; + + /* If circuit build times are disabled, let circuit_expire_building() + * handle it.. */ + if (circuit_build_times_disabled(get_options())) { + return; + } + + /* Is this a circuit for which the timeout applies in a straight-forward + * way? If so, handle it below. If not, just return (and let + * circuit_expire_building() eventually take care of it). + */ + if (!circuit_timeout_want_to_count_circ(circ)) { + return; + } + + tor_gettimeofday(&end); + timediff = tv_mdiff(&circ->base_.timestamp_began, &end); + + /* Check if we would have timed out already. If so, change the + * purpose here. But don't do any timeout handling here if there + * are no circuits opened yet. Save it for circuit_expire_building() + * (to allow it to handle timeout "relaxing" over there). */ + if (timediff > get_circuit_build_timeout_ms() && + circuit_any_opened_circuits_cached()) { + + /* Circuits are allowed to last longer for measurement. + * Switch their purpose and wait. */ + if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) { + log_info(LD_CIRC, + "Deciding to timeout circuit "U64_FORMAT"\n", + U64_PRINTF_ARG(circ->global_identifier)); + circuit_build_times_mark_circ_as_measurement_only(circ); + } + } + + /* If the circuit is built to exactly the DEFAULT_ROUTE_LEN, + * add it to our buildtimes. */ + if (circuit_get_cpath_opened_len(circ) == DEFAULT_ROUTE_LEN) { + /* If the circuit build time is much greater than we would have cut + * it off at, we probably had a suspend event along this codepath, + * and we should discard the value. + */ + if (timediff < 0 || + timediff > 2*get_circuit_build_close_time_ms()+1000) { + log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. " + "Assuming clock jump. Purpose %d (%s)", timediff, + circ->base_.purpose, + circuit_purpose_to_string(circ->base_.purpose)); + } else { + /* Only count circuit times if the network is live */ + if (circuit_build_times_network_check_live( + get_circuit_build_times())) { + circuit_build_times_add_time(get_circuit_build_times_mutable(), + (build_time_t)timediff); + circuit_build_times_set_timeout(get_circuit_build_times_mutable()); + } + + if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) { + circuit_build_times_network_circ_success( + get_circuit_build_times_mutable()); + } + } + } +} /** * Add a new build time value <b>time</b> to the set of build times. Time @@ -667,7 +806,7 @@ circuit_build_times_min(circuit_build_times_t *cbt) } return min_build_time; } -#endif +#endif /* 0 */ /** * Calculate and return a histogram for the set of build times. @@ -753,11 +892,23 @@ circuit_build_times_get_xm(circuit_build_times_t *cbt) histogram[nth_max_bin[n]]); } - /* The following assert is safe, because we don't get called when we - * haven't observed at least CBT_MIN_MIN_CIRCUITS_TO_OBSERVE circuits. */ + /* bin_counts can become zero if all of our last CBT_NCIRCUITS_TO_OBSERVE + * circuits were abandoned before they completed. This shouldn't happen, + * though. We should have reset/re-learned a lower timeout first. */ + if (bin_counts == 0) { + ret = 0; + log_warn(LD_CIRC, + "No valid circuit build time data out of %d times, %u modes, " + "have_timeout=%d, %lfms", cbt->total_build_times, num_modes, + cbt->have_computed_timeout, cbt->timeout_ms); + goto done; + } + tor_assert(bin_counts > 0); ret /= bin_counts; + + done: tor_free(histogram); tor_free(nth_max_bin); @@ -901,12 +1052,12 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, int tot_values = 0; uint32_t loaded_cnt = 0, N = 0; config_line_t *line; - unsigned int i; + int i; build_time_t *loaded_times; int err = 0; circuit_build_times_init(cbt); - if (circuit_build_times_disabled()) { + if (circuit_build_times_disabled(get_options())) { return 0; } @@ -930,7 +1081,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, uint32_t count, k; build_time_t ms; int ok; - ms = (build_time_t)tor_parse_ulong(ms_str, 0, 0, + ms = (build_time_t)tor_parse_ulong(ms_str, 10, 0, CBT_BUILD_TIME_MAX, &ok, NULL); if (!ok) { log_warn(LD_GENERAL, "Unable to parse circuit build times: " @@ -940,7 +1091,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, smartlist_free(args); break; } - count = (uint32_t)tor_parse_ulong(count_str, 0, 0, + count = (uint32_t)tor_parse_ulong(count_str, 10, 0, UINT32_MAX, &ok, NULL); if (!ok) { log_warn(LD_GENERAL, "Unable to parse circuit build times: " @@ -951,8 +1102,8 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, break; } - if (loaded_cnt+count+state->CircuitBuildAbandonedCount - > state->TotalBuildTimes) { + if (loaded_cnt+count+ (unsigned)state->CircuitBuildAbandonedCount + > (unsigned) state->TotalBuildTimes) { log_warn(LD_CIRC, "Too many build times in state file. " "Stopping short before %d", @@ -977,7 +1128,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, loaded_times[loaded_cnt++] = CBT_BUILD_ABANDONED; } - if (loaded_cnt != state->TotalBuildTimes) { + if (loaded_cnt != (unsigned)state->TotalBuildTimes) { log_warn(LD_CIRC, "Corrupt state file? Build times count mismatch. " "Read %d times, but file says %d", loaded_cnt, @@ -1043,6 +1194,10 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) * and less frechet-like. */ cbt->Xm = circuit_build_times_get_xm(cbt); + /* If Xm came back 0, then too many circuits were abandoned. */ + if (cbt->Xm == 0) + return 0; + tor_assert(cbt->Xm > 0); for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) { @@ -1156,7 +1311,7 @@ circuit_build_times_cdf(circuit_build_times_t *cbt, double x) tor_assert(0 <= ret && ret <= 1.0); return ret; } -#endif +#endif /* defined(TOR_UNIT_TESTS) */ #ifdef TOR_UNIT_TESTS /** @@ -1191,7 +1346,7 @@ circuit_build_times_generate_sample(circuit_build_times_t *cbt, tor_assert(ret > 0); return ret; } -#endif +#endif /* defined(TOR_UNIT_TESTS) */ #ifdef TOR_UNIT_TESTS /** @@ -1214,7 +1369,7 @@ circuit_build_times_initial_alpha(circuit_build_times_t *cbt, (tor_mathlog(cbt->Xm)-tor_mathlog(timeout_ms)); tor_assert(cbt->alpha > 0); } -#endif +#endif /* defined(TOR_UNIT_TESTS) */ /** * Returns true if we need circuits to be built @@ -1276,9 +1431,32 @@ circuit_build_times_network_is_live(circuit_build_times_t *cbt) } /** - * Called to indicate that we completed a circuit. Because this circuit + * Non-destructively scale all of our circuit success, timeout, and close + * counts down by a factor of two. Scaling in this way preserves the + * ratios between succeeded vs timed out vs closed circuits, so that + * our statistics don't change when we scale. + * + * This is used only in the rare event that we build more than + * INT32_MAX circuits. Since the num_circ_* variables are + * uint32_t, we won't even be close to overflowing them. + */ +void +circuit_build_times_scale_circ_counts(circuit_build_times_t *cbt) +{ + cbt->num_circ_succeeded /= 2; + cbt->num_circ_timeouts /= 2; + cbt->num_circ_closed /= 2; +} + +/** + * Called to indicate that we "completed" a circuit. Because this circuit * succeeded, it doesn't count as a timeout-after-the-first-hop. * + * (For the purposes of the cbt code, we consider a circuit "completed" if + * it has 3 hops, regardless of its final hop count. We do this because + * we're trying to answer the question, "how long should a circuit take to + * reach the 3-hop count".) + * * This is used by circuit_build_times_network_check_changed() to determine * if we had too many recent timeouts and need to reset our learned timeout * to something higher. @@ -1286,6 +1464,14 @@ circuit_build_times_network_is_live(circuit_build_times_t *cbt) void circuit_build_times_network_circ_success(circuit_build_times_t *cbt) { + // Count circuit success + cbt->num_circ_succeeded++; + + // If we're going to wrap int32, scale everything + if (cbt->num_circ_succeeded >= INT32_MAX) { + circuit_build_times_scale_circ_counts(cbt); + } + /* Check for NULLness because we might not be using adaptive timeouts */ if (cbt->liveness.timeouts_after_firsthop && cbt->liveness.num_recent_circs > 0) { @@ -1308,6 +1494,14 @@ static void circuit_build_times_network_timeout(circuit_build_times_t *cbt, int did_onehop) { + // Count circuit timeout + cbt->num_circ_timeouts++; + + // If we're going to wrap int32, scale everything + if (cbt->num_circ_timeouts >= INT32_MAX) { + circuit_build_times_scale_circ_counts(cbt); + } + /* Check for NULLness because we might not be using adaptive timeouts */ if (cbt->liveness.timeouts_after_firsthop && cbt->liveness.num_recent_circs > 0) { @@ -1333,6 +1527,15 @@ circuit_build_times_network_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time) { time_t now = time(NULL); + + // Count circuit close + cbt->num_circ_closed++; + + // If we're going to wrap int32, scale everything + if (cbt->num_circ_closed >= INT32_MAX) { + circuit_build_times_scale_circ_counts(cbt); + } + /* * Check if this is a timeout that was for a circuit that spent its * entire existence during a time where we have had no network activity. @@ -1431,7 +1634,7 @@ circuit_build_times_network_check_changed(circuit_build_times_t *cbt) #define MAX_TIMEOUT ((int32_t) (INT32_MAX/2)) /* Check to see if this has happened before. If so, double the timeout - * to give people on abysmally bad network connections a shot at access */ + * to give clients on abysmally bad network connections a shot at access */ if (cbt->timeout_ms >= circuit_build_times_get_initial_timeout()) { if (cbt->timeout_ms > MAX_TIMEOUT || cbt->close_ms > MAX_TIMEOUT) { log_warn(LD_CIRC, "Insanely large circuit build timeout value. " @@ -1507,7 +1710,7 @@ circuit_build_times_count_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time) { - if (circuit_build_times_disabled()) { + if (circuit_build_times_disabled(get_options())) { cbt->close_ms = cbt->timeout_ms = circuit_build_times_get_initial_timeout(); return 0; @@ -1538,7 +1741,7 @@ void circuit_build_times_count_timeout(circuit_build_times_t *cbt, int did_onehop) { - if (circuit_build_times_disabled()) { + if (circuit_build_times_disabled(get_options())) { cbt->close_ms = cbt->timeout_ms = circuit_build_times_get_initial_timeout(); return; @@ -1612,7 +1815,7 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) /* * Just return if we aren't using adaptive timeouts */ - if (circuit_build_times_disabled()) + if (circuit_build_times_disabled(get_options())) return; if (!circuit_build_times_set_timeout_worker(cbt)) @@ -1673,7 +1876,7 @@ circuitbuild_running_unit_tests(void) { unit_tests = 1; } -#endif +#endif /* defined(TOR_UNIT_TESTS) */ void circuit_build_times_update_last_circ(circuit_build_times_t *cbt) @@ -1687,6 +1890,8 @@ cbt_control_event_buildtimeout_set(const circuit_build_times_t *cbt, { char *args = NULL; double qnt; + double timeout_rate = 0.0; + double close_rate = 0.0; switch (type) { case BUILDTIMEOUT_SET_EVENT_RESET: @@ -1701,15 +1906,36 @@ cbt_control_event_buildtimeout_set(const circuit_build_times_t *cbt, break; } + /* The timeout rate is the ratio of the timeout count over + * the total number of circuits attempted. The total number of + * circuits is (timeouts+succeeded), since every circuit + * either succeeds, or times out. "Closed" circuits are + * MEASURE_TIMEOUT circuits whose measurement period expired. + * All MEASURE_TIMEOUT circuits are counted in the timeouts stat + * before transitioning to MEASURE_TIMEOUT (in + * circuit_build_times_mark_circ_as_measurement_only()). + * MEASURE_TIMEOUT circuits that succeed are *not* counted as + * "succeeded". See circuit_build_times_handle_completed_hop(). + * + * We cast the denominator + * to promote it to double before the addition, to avoid int32 + * overflow. */ + const double total_circuits = + ((double)cbt->num_circ_timeouts) + cbt->num_circ_succeeded; + if (total_circuits >= 1.0) { + timeout_rate = cbt->num_circ_timeouts / total_circuits; + close_rate = cbt->num_circ_closed / total_circuits; + } + tor_asprintf(&args, "TOTAL_TIMES=%lu " "TIMEOUT_MS=%lu XM=%lu ALPHA=%f CUTOFF_QUANTILE=%f " "TIMEOUT_RATE=%f CLOSE_MS=%lu CLOSE_RATE=%f", (unsigned long)cbt->total_build_times, (unsigned long)cbt->timeout_ms, (unsigned long)cbt->Xm, cbt->alpha, qnt, - circuit_build_times_timeout_rate(cbt), + timeout_rate, (unsigned long)cbt->close_ms, - circuit_build_times_close_rate(cbt)); + close_rate); control_event_buildtimeout_set(type, args); |