diff options
author | Nick Mathewson <nickm@torproject.org> | 2010-06-29 18:57:50 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2010-06-29 18:57:50 -0400 |
commit | bea55766af461e4ca78a4919912f3aa9de978bdc (patch) | |
tree | 4de1f70487f1f0377e5d11a7c1329f70625ba59a /src/or/circuitbuild.c | |
parent | 1d5b2da3a8273797817747e08a3a0b6726cb060a (diff) | |
parent | 5dbf99d9ff59e69c064acda31495486635f8b844 (diff) | |
download | tor-bea55766af461e4ca78a4919912f3aa9de978bdc.tar.gz tor-bea55766af461e4ca78a4919912f3aa9de978bdc.zip |
Merge remote branch 'mikeperry/cbt-bugfixes3'
Diffstat (limited to 'src/or/circuitbuild.c')
-rw-r--r-- | src/or/circuitbuild.c | 534 |
1 files changed, 383 insertions, 151 deletions
diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 417d8ec8d8..1e51ffbe76 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -20,6 +20,8 @@ #define MIN(a,b) ((a)<(b)?(a):(b)) #endif +#define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2)) + /********* START VARIABLES **********/ /** Global list of circuit build times */ // FIXME: Add this as a member for entry_guard_t instead of global? @@ -79,6 +81,32 @@ static int onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice); static void entry_guards_changed(void); +static int +circuit_build_times_disabled(void) +{ + if (unit_tests) { + return 0; + } else { + int consensus_disabled = networkstatus_get_param(NULL, "cbtdisabled", + 0); + int config_disabled = !get_options()->LearnCircuitBuildTimeout; + int dirauth_disabled = get_options()->AuthoritativeDir; + int state_disabled = (get_or_state()->LastWritten == -1); + + if (consensus_disabled || config_disabled || dirauth_disabled || + state_disabled) { + log_info(LD_CIRC, + "CircuitBuildTime learning is disabled. " + "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d", + consensus_disabled, config_disabled, dirauth_disabled, + state_disabled); + return 1; + } else { + return 0; + } + } +} + static int32_t circuit_build_times_max_timeouts(void) { @@ -88,6 +116,14 @@ circuit_build_times_max_timeouts(void) } static int32_t +circuit_build_times_default_num_xm_modes(void) +{ + int32_t num = networkstatus_get_param(NULL, "cbtnummodes", + CBT_DEFAULT_NUM_XM_MODES); + return num; +} + +static int32_t circuit_build_times_min_circs_to_observe(void) { int32_t num = networkstatus_get_param(NULL, "cbtmincircs", @@ -103,6 +139,15 @@ circuit_build_times_quantile_cutoff(void) return num/100.0; } +static double +circuit_build_times_close_quantile(void) +{ + int32_t num = networkstatus_get_param(NULL, "cbtclosequantile", + CBT_DEFAULT_CLOSE_QUANTILE); + + return num/100.0; +} + static int32_t circuit_build_times_test_frequency(void) { @@ -222,7 +267,6 @@ void circuit_build_times_reset(circuit_build_times_t *cbt) { memset(cbt->circuit_build_times, 0, sizeof(cbt->circuit_build_times)); - cbt->pre_timeouts = 0; cbt->total_build_times = 0; cbt->build_times_idx = 0; cbt->have_computed_timeout = 0; @@ -241,34 +285,18 @@ circuit_build_times_init(circuit_build_times_t *cbt) cbt->liveness.num_recent_circs = circuit_build_times_recent_circuit_count(); cbt->liveness.timeouts_after_firsthop = tor_malloc_zero(sizeof(int8_t)* cbt->liveness.num_recent_circs); - cbt->timeout_ms = circuit_build_times_get_initial_timeout(); + cbt->close_ms = cbt->timeout_ms = circuit_build_times_get_initial_timeout(); control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET); } /** - * Rewind our timeout history by n timeout positions. + * Rewind our build time history by n positions. */ static void circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n) { int i = 0; - if (cbt->pre_timeouts) { - /* If we have pre-timeouts, it means we're not yet storing - * timeouts in our normal array. Only rewind the counter. */ - if (cbt->pre_timeouts > n) { - cbt->pre_timeouts -= n; - } else { - cbt->pre_timeouts = 0; - } - log_info(LD_CIRC, - "Rewound history by %d places. Current index: %d. Total: %d. " - "Pre-timeouts: %d", n, cbt->build_times_idx, - cbt->total_build_times, cbt->pre_timeouts); - - return; - } - cbt->build_times_idx -= n; cbt->build_times_idx %= CBT_NCIRCUITS_TO_OBSERVE; @@ -289,7 +317,7 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n) } /** - * Add a new timeout value <b>time</b> to the set of build times. Time + * Add a new build time value <b>time</b> to the set of build times. Time * units are milliseconds. * * circuit_build_times <b>cbt</a> is a circular array, so loop around when @@ -298,14 +326,14 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n) int circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) { - tor_assert(time <= CBT_BUILD_TIME_MAX); - if (time <= 0) { - log_warn(LD_CIRC, "Circuit build time is %u!", time); + if (time <= 0 || time > CBT_BUILD_TIME_MAX) { + log_warn(LD_BUG, "Circuit build time is too large (%u)." + "This is probably a bug.", time); + tor_fragile_assert(); return -1; } - // XXX: Probably want to demote this to debug for the release. - log_info(LD_CIRC, "Adding circuit build time %u", time); + log_debug(LD_CIRC, "Adding circuit build time %u", time); cbt->circuit_build_times[cbt->build_times_idx] = time; cbt->build_times_idx = (cbt->build_times_idx + 1) % CBT_NCIRCUITS_TO_OBSERVE; @@ -330,7 +358,8 @@ circuit_build_times_max(circuit_build_times_t *cbt) int i = 0; build_time_t max_build_time = 0; for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { - if (cbt->circuit_build_times[i] > max_build_time) + if (cbt->circuit_build_times[i] > max_build_time + && cbt->circuit_build_times[i] != CBT_BUILD_ABANDONED) max_build_time = cbt->circuit_build_times[i]; } return max_build_time; @@ -377,7 +406,9 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt, // calculate histogram for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { - if (cbt->circuit_build_times[i] == 0) continue; /* 0 <-> uninitialized */ + if (cbt->circuit_build_times[i] == 0 + || cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED) + continue; /* 0 <-> uninitialized */ c = (cbt->circuit_build_times[i] / CBT_BIN_WIDTH); histogram[c]++; @@ -387,25 +418,55 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt, } /** - * Return the most frequent build time (rounded to CBT_BIN_WIDTH ms). + * Return the Pareto start-of-curve parameter Xm. * - * Ties go in favor of the slower time. + * Because we are not a true Pareto curve, we compute this as the + * weighted average of the N=3 most frequent build time bins. */ static build_time_t -circuit_build_times_mode(circuit_build_times_t *cbt) +circuit_build_times_get_xm(circuit_build_times_t *cbt) { - build_time_t i, nbins, max_bin=0; + build_time_t i, nbins; + build_time_t *nth_max_bin; + int32_t bin_counts=0; + build_time_t ret = 0; uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins); + int n=0; + int num_modes = circuit_build_times_default_num_xm_modes(); + + // Only use one mode if < 1000 buildtimes. Not enough data + // for multiple. + if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE) + num_modes = 1; + + nth_max_bin = (build_time_t*)tor_malloc_zero(num_modes*sizeof(build_time_t)); for (i = 0; i < nbins; i++) { - if (histogram[i] >= histogram[max_bin]) { - max_bin = i; + if (histogram[i] >= histogram[nth_max_bin[0]]) { + nth_max_bin[0] = i; } + + for (n = 1; n < num_modes; n++) { + if (histogram[i] >= histogram[nth_max_bin[n]] && + (!histogram[nth_max_bin[n-1]] + || histogram[i] < histogram[nth_max_bin[n-1]])) { + nth_max_bin[n] = i; + } + } + } + + for (n = 0; n < num_modes; n++) { + bin_counts += histogram[nth_max_bin[n]]; + ret += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]]; + log_info(LD_CIRC, "Xm mode #%d: %u %u", n, CBT_BIN_TO_MS(nth_max_bin[n]), + histogram[nth_max_bin[n]]); } + ret /= bin_counts; tor_free(histogram); + tor_free(nth_max_bin); - return max_bin*CBT_BIN_WIDTH+CBT_BIN_WIDTH/2; + return ret; } /** @@ -428,6 +489,12 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, *next = NULL; state->TotalBuildTimes = cbt->total_build_times; + state->CircuitBuildAbandonedCount = 0; + + for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED) + state->CircuitBuildAbandonedCount++; + } for (i = 0; i < nbins; i++) { // compress the histogram by skipping the blanks @@ -436,7 +503,7 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, line->key = tor_strdup("CircuitBuildTimeBin"); line->value = tor_malloc(25); tor_snprintf(line->value, 25, "%d %d", - i*CBT_BIN_WIDTH+CBT_BIN_WIDTH/2, histogram[i]); + CBT_BIN_TO_MS(i), histogram[i]); next = &(line->next); } @@ -480,6 +547,43 @@ circuit_build_times_shuffle_and_store_array(circuit_build_times_t *cbt, } /** + * Filter old synthetic timeouts that were created before the + * new right-censored Pareto calculation was deployed. + * + * Once all clients before 0.2.1.13-alpha are gone, this code + * will be unused. + */ +static int +circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) +{ + int num_filtered=0, i=0; + double timeout_rate = 0; + build_time_t max_timeout = 0; + + timeout_rate = circuit_build_times_timeout_rate(cbt); + max_timeout = (build_time_t)cbt->close_ms; + + for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] > max_timeout) { + build_time_t replaced = cbt->circuit_build_times[i]; + num_filtered++; + cbt->circuit_build_times[i] = CBT_BUILD_ABANDONED; + + log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced, + cbt->circuit_build_times[i]); + } + } + + log_info(LD_CIRC, + "We had %d timeouts out of %d build times, " + "and filtered %d above the max of %u", + (int)(cbt->total_build_times*timeout_rate), + cbt->total_build_times, num_filtered, max_timeout); + + return num_filtered; +} + +/** * Load histogram from <b>state</b>, shuffling the resulting array * after we do so. Use this result to estimate parameters and * calculate the timeout. @@ -493,12 +597,18 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, int tot_values = 0; uint32_t loaded_cnt = 0, N = 0; config_line_t *line; - int i; - build_time_t *loaded_times = tor_malloc(sizeof(build_time_t) - * state->TotalBuildTimes); + unsigned int i; + build_time_t *loaded_times; circuit_build_times_init(cbt); *msg = NULL; + if (circuit_build_times_disabled()) { + return 0; + } + + /* build_time_t 0 means uninitialized */ + loaded_times = tor_malloc_zero(sizeof(build_time_t)*state->TotalBuildTimes); + for (line = state->BuildtimeHistogram; line; line = line->next) { smartlist_t *args = smartlist_create(); smartlist_split_string(args, line->value, " ", @@ -534,7 +644,8 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, break; } - if (loaded_cnt+count > state->TotalBuildTimes) { + if (loaded_cnt+count+state->CircuitBuildAbandonedCount + > state->TotalBuildTimes) { log_warn(LD_CIRC, "Too many build times in state file. " "Stopping short before %d", @@ -553,10 +664,17 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, } } + log_info(LD_CIRC, + "Adding %d timeouts.", state->CircuitBuildAbandonedCount); + for (i=0; i < state->CircuitBuildAbandonedCount; i++) { + loaded_times[loaded_cnt++] = CBT_BUILD_ABANDONED; + } + if (loaded_cnt != state->TotalBuildTimes) { log_warn(LD_CIRC, "Corrupt state file? Build times count mismatch. " - "Read %d, file says %d", loaded_cnt, state->TotalBuildTimes); + "Read %d times, but file says %d", loaded_cnt, + state->TotalBuildTimes); } circuit_build_times_shuffle_and_store_array(cbt, loaded_times, loaded_cnt); @@ -572,7 +690,13 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, tot_values, cbt->total_build_times, N); tor_assert(cbt->total_build_times == tot_values); tor_assert(cbt->total_build_times <= CBT_NCIRCUITS_TO_OBSERVE); + circuit_build_times_set_timeout(cbt); + + if (!state->CircuitBuildAbandonedCount && cbt->total_build_times) { + circuit_build_times_filter_timeouts(cbt); + } + tor_free(loaded_times); return *msg ? -1 : 0; } @@ -591,12 +715,15 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) { build_time_t *x=cbt->circuit_build_times; double a = 0; - int n=0,i=0; + int n=0,i=0,abandoned_count=0; + build_time_t max_time=0; /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */ /* We sort of cheat here and make our samples slightly more pareto-like * and less frechet-like. */ - cbt->Xm = circuit_build_times_mode(cbt); + cbt->Xm = circuit_build_times_get_xm(cbt); + + tor_assert(cbt->Xm > 0); for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) { if (!x[i]) { @@ -605,8 +732,12 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) if (x[i] < cbt->Xm) { a += tor_mathlog(cbt->Xm); + } else if (x[i] == CBT_BUILD_ABANDONED) { + abandoned_count++; } else { a += tor_mathlog(x[i]); + if (x[i] > max_time) + max_time = x[i]; } n++; } @@ -617,8 +748,15 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) } tor_assert(n==cbt->total_build_times); + tor_assert(max_time > 0); + + a += abandoned_count*tor_mathlog(max_time); + a -= n*tor_mathlog(cbt->Xm); - a = n/a; + // Estimator comes from Eq #4 in: + // "Bayesian estimation based on trimmed samples from Pareto populations" + // by Arturo J. Fernández. We are right-censored only. + a = (n-abandoned_count)/a; cbt->alpha = a; } @@ -698,35 +836,6 @@ circuit_build_times_generate_sample(circuit_build_times_t *cbt, return ret; } -/** Generate points in [cutoff, 1.0) on the CDF. */ -void -circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, - double quantile_cutoff) -{ - // XXX: This may be failing when the number of samples is small? - // Keep getting values for the largest timeout bucket over and over - // again... Probably because alpha is very very large in that case.. - build_time_t gentime = circuit_build_times_generate_sample(cbt, - quantile_cutoff, CBT_MAX_SYNTHETIC_QUANTILE); - - if (gentime < (build_time_t)tor_lround(cbt->timeout_ms)) { - log_warn(LD_CIRC, - "Generated a synthetic timeout LESS than the current timeout: " - "%ums vs %lfms using Xm: %d a: %lf, q: %lf", - gentime, cbt->timeout_ms, cbt->Xm, cbt->alpha, quantile_cutoff); - } else if (gentime > CBT_BUILD_TIME_MAX) { - log_info(LD_CIRC, - "Generated a synthetic timeout larger than the max: %u", - gentime); - gentime = CBT_BUILD_TIME_MAX; - } else { - log_info(LD_CIRC, "Generated synthetic circuit build time %u for timeout", - gentime); - } - - circuit_build_times_add_time(cbt, gentime); -} - /** * Estimate an initial alpha parameter by solving the quantile * function with a quantile point and a specific timeout value. @@ -749,33 +858,6 @@ circuit_build_times_initial_alpha(circuit_build_times_t *cbt, } /** - * Generate synthetic timeout values for the timeouts - * that have happened before we estimated our parameters. - */ -static void -circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) -{ - /* Store a timeout as a random position past the current - * cutoff on the pareto curve */ - if (cbt->pre_timeouts) { - double timeout_quantile = 1.0- - ((double)cbt->pre_timeouts)/ - (cbt->pre_timeouts+cbt->total_build_times); - /* Make sure it doesn't exceed the synthetic max */ - timeout_quantile *= CBT_MAX_SYNTHETIC_QUANTILE; - cbt->Xm = circuit_build_times_mode(cbt); - tor_assert(cbt->Xm > 0); - /* Use current timeout to get an estimate on alpha */ - circuit_build_times_initial_alpha(cbt, timeout_quantile, - cbt->timeout_ms); - while (cbt->pre_timeouts-- != 0) { - circuit_build_times_add_timeout_worker(cbt, timeout_quantile); - } - cbt->pre_timeouts = 0; - } -} - -/** * Returns true if we need circuits to be built */ int @@ -825,16 +907,27 @@ circuit_build_times_network_circ_success(circuit_build_times_t *cbt) } /** - * A circuit just timed out. If there has been no recent network activity - * at all, but this circuit was launched back when we thought the network - * was live, increment the number of "nonlive" circuit timeouts. - * - * Also distinguish between whether it failed before the first hop - * and record that in our history for later deciding if the network has - * changed. + * A circuit just timed out. If it failed after the first hop, record it + * in our history for later deciding if the network speed has changed. */ static void circuit_build_times_network_timeout(circuit_build_times_t *cbt, + int did_onehop) +{ + if (did_onehop) { + cbt->liveness.timeouts_after_firsthop[cbt->liveness.after_firsthop_idx]=1; + cbt->liveness.after_firsthop_idx++; + cbt->liveness.after_firsthop_idx %= cbt->liveness.num_recent_circs; + } +} + +/** + * A circuit was just forcibly closed. If there has been no recent network + * activity at all, but this circuit was launched back when we thought the + * network was live, increment the number of "nonlive" circuit timeouts. + */ +static void +circuit_build_times_network_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time) { time_t now = time(NULL); @@ -843,16 +936,21 @@ circuit_build_times_network_timeout(circuit_build_times_t *cbt, * entire existence during a time where we have had no network activity. * * Also double check that it is a valid timeout after we have possibly - * just recently reset cbt->timeout_ms. + * just recently reset cbt->close_ms. + * + * We use close_ms here because timeouts aren't actually counted as timeouts + * until close_ms elapses. */ if (cbt->liveness.network_last_live <= start_time && - start_time <= (now - cbt->timeout_ms/1000.0)) { + start_time <= (now - cbt->close_ms/1000.0)) { + if (did_onehop) { + log_warn(LD_BUG, + "Circuit somehow completed a hop while the network was " + "not live. Network was last live at %ld, but circuit launched " + "at %ld. It's now %ld.", cbt->liveness.network_last_live, + start_time, now); + } cbt->liveness.nonlive_timeouts++; - } else if (did_onehop) { - /* Count a one-hop timeout */ - cbt->liveness.timeouts_after_firsthop[cbt->liveness.after_firsthop_idx]=1; - cbt->liveness.after_firsthop_idx++; - cbt->liveness.after_firsthop_idx %= cbt->liveness.num_recent_circs; } } @@ -888,17 +986,22 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt) "Temporarily raising timeout to %lds.", (long int)(now - cbt->liveness.network_last_live), tor_lround(circuit_build_times_get_initial_timeout()/1000)); - cbt->timeout_ms = circuit_build_times_get_initial_timeout(); - cbt->liveness.net_suspended = 1; + cbt->liveness.suspended_timeout = cbt->timeout_ms; + cbt->liveness.suspended_close_timeout = cbt->close_ms; + cbt->close_ms = cbt->timeout_ms + = circuit_build_times_get_initial_timeout(); control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_SUSPENDED); } return 0; - } else if (cbt->liveness.net_suspended) { + } else if (cbt->liveness.suspended_timeout > 0) { log_notice(LD_CIRC, "Network activity has resumed. " "Resuming circuit timeout calculations."); - cbt->liveness.net_suspended = 0; + cbt->timeout_ms = cbt->liveness.suspended_timeout; + cbt->close_ms = cbt->liveness.suspended_close_timeout; + cbt->liveness.suspended_timeout = 0; + cbt->liveness.suspended_close_timeout = 0; control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESUME); } @@ -942,9 +1045,17 @@ circuit_build_times_network_check_changed(circuit_build_times_t *cbt) /* Check to see if this has happened before. If so, double the timeout * to give people on abysmally bad network connections a shot at access */ if (cbt->timeout_ms >= circuit_build_times_get_initial_timeout()) { - cbt->timeout_ms *= 2; + if (cbt->timeout_ms > INT32_MAX/2 || cbt->close_ms > INT32_MAX/2) { + log_warn(LD_CIRC, "Insanely large circuit build timeout value. " + "(timeout = %lfmsec, close = %lfmsec)", + cbt->timeout_ms, cbt->close_ms); + } else { + cbt->timeout_ms *= 2; + cbt->close_ms *= 2; + } } else { - cbt->timeout_ms = circuit_build_times_get_initial_timeout(); + cbt->close_ms = cbt->timeout_ms + = circuit_build_times_get_initial_timeout(); } control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET); @@ -959,77 +1070,180 @@ circuit_build_times_network_check_changed(circuit_build_times_t *cbt) } /** + * Count the number of timeouts in a set of cbt data. + */ +double +circuit_build_times_timeout_rate(const circuit_build_times_t *cbt) +{ + int i=0,timeouts=0; + for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] >= cbt->timeout_ms) { + timeouts++; + } + } + + if (!cbt->total_build_times) + return 0; + + return ((double)timeouts)/cbt->total_build_times; +} + +/** + * Count the number of closed circuits in a set of cbt data. + */ +double +circuit_build_times_close_rate(const circuit_build_times_t *cbt) +{ + int i=0,closed=0; + for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED) { + closed++; + } + } + + if (!cbt->total_build_times) + return 0; + + return ((double)closed)/cbt->total_build_times; +} + +/** * Store a timeout as a synthetic value. * * Returns true if the store was successful and we should possibly * update our timeout estimate. */ int -circuit_build_times_add_timeout(circuit_build_times_t *cbt, +circuit_build_times_count_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time) { - circuit_build_times_network_timeout(cbt, did_onehop, start_time); + if (circuit_build_times_disabled()) { + cbt->close_ms = cbt->timeout_ms + = circuit_build_times_get_initial_timeout(); + return 0; + } + + /* Record this force-close to help determine if the network is dead */ + circuit_build_times_network_close(cbt, did_onehop, start_time); /* Only count timeouts if network is live.. */ if (!circuit_build_times_network_check_live(cbt)) { return 0; } - /* If there are a ton of timeouts, we should reduce - * the circuit build timeout */ - if (circuit_build_times_network_check_changed(cbt)) { - return 0; - } + circuit_build_times_add_time(cbt, CBT_BUILD_ABANDONED); + return 1; +} - if (!cbt->have_computed_timeout) { - /* Store a timeout before we have enough data */ - cbt->pre_timeouts++; - log_info(LD_CIRC, - "Not enough circuits yet to calculate a new build timeout." - " Need %d more.", circuit_build_times_min_circs_to_observe() - - cbt->total_build_times); - return 0; +/** + * Update timeout counts to determine if we need to expire + * our build time history due to excessive timeouts. + */ +void +circuit_build_times_count_timeout(circuit_build_times_t *cbt, + int did_onehop) +{ + if (circuit_build_times_disabled()) { + cbt->close_ms = cbt->timeout_ms + = circuit_build_times_get_initial_timeout(); + return; } - circuit_build_times_count_pretimeouts(cbt); - circuit_build_times_add_timeout_worker(cbt, - circuit_build_times_quantile_cutoff()); + circuit_build_times_network_timeout(cbt, did_onehop); - return 1; + /* If there are a ton of timeouts, we should reset + * the circuit build timeout. + */ + circuit_build_times_network_check_changed(cbt); } /** * Estimate a new timeout based on history and set our timeout * variable accordingly. */ -void -circuit_build_times_set_timeout(circuit_build_times_t *cbt) +static int +circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt) { if (cbt->total_build_times < circuit_build_times_min_circs_to_observe()) { - return; + return 0; } - circuit_build_times_count_pretimeouts(cbt); circuit_build_times_update_alpha(cbt); cbt->timeout_ms = circuit_build_times_calculate_timeout(cbt, circuit_build_times_quantile_cutoff()); + cbt->close_ms = circuit_build_times_calculate_timeout(cbt, + circuit_build_times_close_quantile()); + + /* Sometimes really fast guard nodes give us such a steep curve + * that this ends up being not that much greater than timeout_ms. + * Make it be at least 1 min to handle this case. */ + cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout()); + cbt->have_computed_timeout = 1; + return 1; +} + +/** + * Exposed function to compute a new timeout. Dispatches events and + * also filters out extremely high timeout values. + */ +void +circuit_build_times_set_timeout(circuit_build_times_t *cbt) +{ + long prev_timeout = tor_lround(cbt->timeout_ms/1000); + double timeout_rate; + + if (!circuit_build_times_set_timeout_worker(cbt)) + return; if (cbt->timeout_ms < circuit_build_times_min_timeout()) { log_warn(LD_CIRC, "Set buildtimeout to low value %lfms. Setting to %dms", cbt->timeout_ms, circuit_build_times_min_timeout()); cbt->timeout_ms = circuit_build_times_min_timeout(); + if (cbt->close_ms < cbt->timeout_ms) { + /* This shouldn't happen because of MAX() in timeout_worker above, + * but doing it just in case */ + cbt->close_ms = circuit_build_times_initial_timeout(); + } } control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_COMPUTED); - log_info(LD_CIRC, - "Set circuit build timeout to %lds (%lfms, Xm: %d, a: %lf) " - "based on %d circuit times", tor_lround(cbt->timeout_ms/1000), - cbt->timeout_ms, cbt->Xm, cbt->alpha, cbt->total_build_times); + timeout_rate = circuit_build_times_timeout_rate(cbt); + + if (prev_timeout > tor_lround(cbt->timeout_ms/1000)) { + log_notice(LD_CIRC, + "Based on %d circuit times, it looks like we don't need to " + "wait so long for circuits to finish. We will now assume a " + "circuit is too slow to use after waiting %ld seconds.", + cbt->total_build_times, + tor_lround(cbt->timeout_ms/1000)); + log_info(LD_CIRC, + "Circuit timeout data: %lfms, %lfms, Xm: %d, a: %lf, r: %lf", + cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, + timeout_rate); + } else if (prev_timeout < tor_lround(cbt->timeout_ms/1000)) { + log_notice(LD_CIRC, + "Based on %d circuit times, it looks like we need to wait " + "longer for circuits to finish. We will now assume a " + "circuit is too slow to use after waiting %ld seconds.", + cbt->total_build_times, + tor_lround(cbt->timeout_ms/1000)); + log_info(LD_CIRC, + "Circuit timeout data: %lfms, %lfms, Xm: %d, a: %lf, r: %lf", + cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, + timeout_rate); + } else { + log_info(LD_CIRC, + "Set circuit build timeout to %lds (%lfms, %lfms, Xm: %d, a: %lf," + " r: %lf) based on %d circuit times", + tor_lround(cbt->timeout_ms/1000), + cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, timeout_rate, + cbt->total_build_times); + } } /** Iterate over values of circ_id, starting from conn-\>next_circ_id, @@ -1620,11 +1834,25 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) long timediff; tor_gettimeofday(&end); timediff = tv_mdiff(&circ->_base.highres_created, &end); - if (timediff > INT32_MAX) - timediff = INT32_MAX; - circuit_build_times_add_time(&circ_times, (build_time_t)timediff); - circuit_build_times_network_circ_success(&circ_times); - circuit_build_times_set_timeout(&circ_times); + /* + * If the circuit build time is much greater than we would have cut + * it off at, we probably had a suspend event along this codepath, + * and we should discard the value. + */ + if (timediff < 0 || timediff > 2*circ_times.close_ms+1000) { + log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. " + "Assuming clock jump.", timediff); + } else if (!circuit_build_times_disabled()) { + /* Don't count circuit times if the network was not live */ + if (circuit_build_times_network_check_live(&circ_times)) { + circuit_build_times_add_time(&circ_times, (build_time_t)timediff); + circuit_build_times_set_timeout(&circ_times); + } + + if (circ->_base.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) { + circuit_build_times_network_circ_success(&circ_times); + } + } } log_info(LD_CIRC,"circuit built!"); circuit_reset_failure_count(0); @@ -1646,6 +1874,10 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) } circuit_rep_hist_note_result(circ); circuit_has_opened(circ); /* do other actions as necessary */ + + /* We're done with measurement circuits here. Just close them */ + if (circ->_base.purpose == CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) + circuit_mark_for_close(TO_CIRCUIT(circ), END_CIRC_REASON_FINISHED); return 0; } |