diff options
Diffstat (limited to 'src/or/circuitstats.c')
-rw-r--r-- | src/or/circuitstats.c | 231 |
1 files changed, 218 insertions, 13 deletions
diff --git a/src/or/circuitstats.c b/src/or/circuitstats.c index b8421a3c7e..f87bbd822b 100644 --- a/src/or/circuitstats.c +++ b/src/or/circuitstats.c @@ -36,6 +36,8 @@ #include "rendclient.h" #include "rendservice.h" #include "statefile.h" +#include "circuitlist.h" +#include "circuituse.h" #undef log #include <math.h> @@ -43,6 +45,7 @@ static void cbt_control_event_buildtimeout_set( const circuit_build_times_t *cbt, buildtimeout_set_event_t type); +static void circuit_build_times_scale_circ_counts(circuit_build_times_t *cbt); #define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2)) @@ -164,7 +167,7 @@ circuit_build_times_disabled_(const or_options_t *options, } /** - * Retrieve and bounds-check the cbtmaxtimeouts consensus paramter. + * Retrieve and bounds-check the cbtmaxtimeouts consensus parameter. * * Effect: When this many timeouts happen in the last 'cbtrecentcount' * circuit attempts, the client should discard all of its history and @@ -191,7 +194,7 @@ circuit_build_times_max_timeouts(void) } /** - * Retrieve and bounds-check the cbtnummodes consensus paramter. + * Retrieve and bounds-check the cbtnummodes consensus parameter. * * Effect: This value governs how many modes to use in the weighted * average calculation of Pareto parameter Xm. A value of 3 introduces @@ -218,7 +221,7 @@ circuit_build_times_default_num_xm_modes(void) } /** - * Retrieve and bounds-check the cbtmincircs consensus paramter. + * Retrieve and bounds-check the cbtmincircs consensus parameter. * * Effect: This is the minimum number of circuits to build before * computing a timeout. @@ -250,7 +253,7 @@ circuit_build_times_enough_to_compute(const circuit_build_times_t *cbt) } /** - * Retrieve and bounds-check the cbtquantile consensus paramter. + * Retrieve and bounds-check the cbtquantile consensus parameter. * * Effect: This is the position on the quantile curve to use to set the * timeout value. It is a percent (10-99). @@ -274,7 +277,7 @@ circuit_build_times_quantile_cutoff(void) } /** - * Retrieve and bounds-check the cbtclosequantile consensus paramter. + * Retrieve and bounds-check the cbtclosequantile consensus parameter. * * Effect: This is the position on the quantile curve to use to set the * timeout value to use to actually close circuits. It is a percent @@ -306,7 +309,7 @@ circuit_build_times_close_quantile(void) } /** - * Retrieve and bounds-check the cbttestfreq consensus paramter. + * Retrieve and bounds-check the cbttestfreq consensus parameter. * * Effect: Describes how often in seconds to build a test circuit to * gather timeout values. Only applies if less than 'cbtmincircs' @@ -353,7 +356,7 @@ circuit_build_times_min_timeout(void) } /** - * Retrieve and bounds-check the cbtinitialtimeout consensus paramter. + * Retrieve and bounds-check the cbtinitialtimeout consensus parameter. * * Effect: This is the timeout value to use before computing a timeout, * in milliseconds. @@ -383,7 +386,7 @@ circuit_build_times_initial_timeout(void) } /** - * Retrieve and bounds-check the cbtrecentcount consensus paramter. + * Retrieve and bounds-check the cbtrecentcount consensus parameter. * * Effect: This is the number of circuit build times to keep track of * for deciding if we hit cbtmaxtimeouts and need to reset our state @@ -540,6 +543,11 @@ circuit_build_times_reset(circuit_build_times_t *cbt) cbt->total_build_times = 0; cbt->build_times_idx = 0; cbt->have_computed_timeout = 0; + + // Reset timeout and close counts + cbt->num_circ_succeeded = 0; + cbt->num_circ_closed = 0; + cbt->num_circ_timeouts = 0; } /** @@ -616,6 +624,123 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n) #endif /* 0 */ /** + * Mark this circuit as timed out, but change its purpose + * so that it continues to build, allowing us to measure + * its full build time. + */ +void +circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t *circ) +{ + control_event_circuit_status(circ, + CIRC_EVENT_FAILED, + END_CIRC_REASON_TIMEOUT); + circuit_change_purpose(TO_CIRCUIT(circ), + CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT); + /* Record this event to check for too many timeouts + * in a row. This function does not record a time value yet + * (we do that later); it only counts the fact that we did + * have a timeout. We also want to avoid double-counting + * already "relaxed" circuits, which are counted in + * circuit_expire_building(). */ + if (!circ->relaxed_timeout) { + int first_hop_succeeded = circ->cpath && + circ->cpath->state == CPATH_STATE_OPEN; + + circuit_build_times_count_timeout( + get_circuit_build_times_mutable(), + first_hop_succeeded); + } +} + +/** + * Perform the build time work that needs to be done when a circuit + * completes a hop. + * + * This function decides if we should record a circuit's build time + * in our histogram data and other statistics, and if so, records it. + * It also will mark circuits that have already timed out as + * measurement-only circuits, so they can continue to build but + * not get used. + * + * For this, we want to consider circuits that will eventually make + * it to the third hop. For circuits longer than 3 hops, we want to + * record their build time when they reach the third hop, but let + * them continue (and not count them later). For circuits that are + * exactly 3 hops, this will count them when they are completed. We + * do this so that CBT is always gathering statistics on circuits + * of the same length, regardless of their type. + */ +void +circuit_build_times_handle_completed_hop(origin_circuit_t *circ) +{ + struct timeval end; + long timediff; + + /* If circuit build times are disabled, let circuit_expire_building() + * handle it.. */ + if (circuit_build_times_disabled(get_options())) { + return; + } + + /* Is this a circuit for which the timeout applies in a straight-forward + * way? If so, handle it below. If not, just return (and let + * circuit_expire_building() eventually take care of it). + */ + if (!circuit_timeout_want_to_count_circ(circ)) { + return; + } + + tor_gettimeofday(&end); + timediff = tv_mdiff(&circ->base_.timestamp_began, &end); + + /* Check if we would have timed out already. If so, change the + * purpose here. But don't do any timeout handling here if there + * are no circuits opened yet. Save it for circuit_expire_building() + * (to allow it to handle timeout "relaxing" over there). */ + if (timediff > get_circuit_build_timeout_ms() && + circuit_any_opened_circuits_cached()) { + + /* Circuits are allowed to last longer for measurement. + * Switch their purpose and wait. */ + if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) { + log_info(LD_CIRC, + "Deciding to timeout circuit "U64_FORMAT"\n", + U64_PRINTF_ARG(circ->global_identifier)); + circuit_build_times_mark_circ_as_measurement_only(circ); + } + } + + /* If the circuit is built to exactly the DEFAULT_ROUTE_LEN, + * add it to our buildtimes. */ + if (circuit_get_cpath_opened_len(circ) == DEFAULT_ROUTE_LEN) { + /* If the circuit build time is much greater than we would have cut + * it off at, we probably had a suspend event along this codepath, + * and we should discard the value. + */ + if (timediff < 0 || + timediff > 2*get_circuit_build_close_time_ms()+1000) { + log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. " + "Assuming clock jump. Purpose %d (%s)", timediff, + circ->base_.purpose, + circuit_purpose_to_string(circ->base_.purpose)); + } else { + /* Only count circuit times if the network is live */ + if (circuit_build_times_network_check_live( + get_circuit_build_times())) { + circuit_build_times_add_time(get_circuit_build_times_mutable(), + (build_time_t)timediff); + circuit_build_times_set_timeout(get_circuit_build_times_mutable()); + } + + if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) { + circuit_build_times_network_circ_success( + get_circuit_build_times_mutable()); + } + } + } +} + +/** * Add a new build time value <b>time</b> to the set of build times. Time * units are milliseconds. * @@ -767,11 +892,23 @@ circuit_build_times_get_xm(circuit_build_times_t *cbt) histogram[nth_max_bin[n]]); } - /* The following assert is safe, because we don't get called when we - * haven't observed at least CBT_MIN_MIN_CIRCUITS_TO_OBSERVE circuits. */ + /* bin_counts can become zero if all of our last CBT_NCIRCUITS_TO_OBSERVE + * circuits were abandoned before they completed. This shouldn't happen, + * though. We should have reset/re-learned a lower timeout first. */ + if (bin_counts == 0) { + ret = 0; + log_warn(LD_CIRC, + "No valid circuit build time data out of %d times, %u modes, " + "have_timeout=%d, %lfms", cbt->total_build_times, num_modes, + cbt->have_computed_timeout, cbt->timeout_ms); + goto done; + } + tor_assert(bin_counts > 0); ret /= bin_counts; + + done: tor_free(histogram); tor_free(nth_max_bin); @@ -1057,6 +1194,10 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) * and less frechet-like. */ cbt->Xm = circuit_build_times_get_xm(cbt); + /* If Xm came back 0, then too many circuits were abandoned. */ + if (cbt->Xm == 0) + return 0; + tor_assert(cbt->Xm > 0); for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) { @@ -1290,9 +1431,32 @@ circuit_build_times_network_is_live(circuit_build_times_t *cbt) } /** - * Called to indicate that we completed a circuit. Because this circuit + * Non-destructively scale all of our circuit success, timeout, and close + * counts down by a factor of two. Scaling in this way preserves the + * ratios between succeeded vs timed out vs closed circuits, so that + * our statistics don't change when we scale. + * + * This is used only in the rare event that we build more than + * INT32_MAX circuits. Since the num_circ_* variables are + * uint32_t, we won't even be close to overflowing them. + */ +void +circuit_build_times_scale_circ_counts(circuit_build_times_t *cbt) +{ + cbt->num_circ_succeeded /= 2; + cbt->num_circ_timeouts /= 2; + cbt->num_circ_closed /= 2; +} + +/** + * Called to indicate that we "completed" a circuit. Because this circuit * succeeded, it doesn't count as a timeout-after-the-first-hop. * + * (For the purposes of the cbt code, we consider a circuit "completed" if + * it has 3 hops, regardless of its final hop count. We do this because + * we're trying to answer the question, "how long should a circuit take to + * reach the 3-hop count".) + * * This is used by circuit_build_times_network_check_changed() to determine * if we had too many recent timeouts and need to reset our learned timeout * to something higher. @@ -1300,6 +1464,14 @@ circuit_build_times_network_is_live(circuit_build_times_t *cbt) void circuit_build_times_network_circ_success(circuit_build_times_t *cbt) { + // Count circuit success + cbt->num_circ_succeeded++; + + // If we're going to wrap int32, scale everything + if (cbt->num_circ_succeeded >= INT32_MAX) { + circuit_build_times_scale_circ_counts(cbt); + } + /* Check for NULLness because we might not be using adaptive timeouts */ if (cbt->liveness.timeouts_after_firsthop && cbt->liveness.num_recent_circs > 0) { @@ -1322,6 +1494,14 @@ static void circuit_build_times_network_timeout(circuit_build_times_t *cbt, int did_onehop) { + // Count circuit timeout + cbt->num_circ_timeouts++; + + // If we're going to wrap int32, scale everything + if (cbt->num_circ_timeouts >= INT32_MAX) { + circuit_build_times_scale_circ_counts(cbt); + } + /* Check for NULLness because we might not be using adaptive timeouts */ if (cbt->liveness.timeouts_after_firsthop && cbt->liveness.num_recent_circs > 0) { @@ -1347,6 +1527,15 @@ circuit_build_times_network_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time) { time_t now = time(NULL); + + // Count circuit close + cbt->num_circ_closed++; + + // If we're going to wrap int32, scale everything + if (cbt->num_circ_closed >= INT32_MAX) { + circuit_build_times_scale_circ_counts(cbt); + } + /* * Check if this is a timeout that was for a circuit that spent its * entire existence during a time where we have had no network activity. @@ -1701,6 +1890,8 @@ cbt_control_event_buildtimeout_set(const circuit_build_times_t *cbt, { char *args = NULL; double qnt; + double timeout_rate = 0.0; + double close_rate = 0.0; switch (type) { case BUILDTIMEOUT_SET_EVENT_RESET: @@ -1715,15 +1906,29 @@ cbt_control_event_buildtimeout_set(const circuit_build_times_t *cbt, break; } + /* The timeout rate is the ratio of the timeout count over + * the total number of circuits attempted. The total number of + * circuits is (timeouts+succeeded+closed), since a circuit can + * either timeout, close, or succeed. We cast the denominator + * to promote it to double before the addition, to avoid int32 + * overflow. */ + const double total_circuits = + ((double)cbt->num_circ_timeouts) + cbt->num_circ_succeeded + + cbt->num_circ_closed; + if (total_circuits >= 1.0) { + timeout_rate = cbt->num_circ_timeouts / total_circuits; + close_rate = cbt->num_circ_closed / total_circuits; + } + tor_asprintf(&args, "TOTAL_TIMES=%lu " "TIMEOUT_MS=%lu XM=%lu ALPHA=%f CUTOFF_QUANTILE=%f " "TIMEOUT_RATE=%f CLOSE_MS=%lu CLOSE_RATE=%f", (unsigned long)cbt->total_build_times, (unsigned long)cbt->timeout_ms, (unsigned long)cbt->Xm, cbt->alpha, qnt, - circuit_build_times_timeout_rate(cbt), + timeout_rate, (unsigned long)cbt->close_ms, - circuit_build_times_close_rate(cbt)); + close_rate); control_event_buildtimeout_set(type, args); |