From cc2a48f1be7a3bcd7e31b969a62ed1279c521682 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 4 May 2010 14:42:37 -0700 Subject: Bug 1335: Alter Xm calculation to be weighted avg of top N=3 modes. In my state files, I was seeing several peaks, probably due to different guards having different latency. This change is meant to better capture this behavior and generate more reasonable timeouts when it happens. It is improving the timeout values for my collection of state files. --- src/or/circuitbuild.c | 50 ++++++++++++++++++++++++++++++++++++++++---------- src/or/or.h | 3 +++ 2 files changed, 43 insertions(+), 10 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 233d60f15c..98f6baf2bd 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -20,6 +20,8 @@ #define MIN(a,b) ((a)<(b)?(a):(b)) #endif +#define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2)) + /********* START VARIABLES **********/ /** Global list of circuit build times */ // FIXME: Add this as a member for entry_guard_t instead of global? @@ -387,25 +389,53 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt, } /** - * Return the most frequent build time (rounded to CBT_BIN_WIDTH ms). + * Return the Pareto start-of-curve parameter Xm. * - * Ties go in favor of the slower time. + * Because we are not a true Pareto curve, we compute this as the + * weighted average of the N=3 most frequent build time bins. */ static build_time_t -circuit_build_times_mode(circuit_build_times_t *cbt) +circuit_build_times_get_xm(circuit_build_times_t *cbt) { - build_time_t i, nbins, max_bin=0; + build_time_t i, nbins; + build_time_t nth_max_bin[CBT_NUM_XM_MODES]; + int32_t bin_counts=0; + build_time_t ret = 0; uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins); + int n=0; + int num_modes = CBT_NUM_XM_MODES; + + // Only use one mode if < 1000 buildtimes. Not enough data + // for multiple. + if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE) + num_modes = 1; + memset(nth_max_bin, 0, sizeof(nth_max_bin)); for (i = 0; i < nbins; i++) { - if (histogram[i] >= histogram[max_bin]) { - max_bin = i; + if (histogram[i] >= histogram[nth_max_bin[0]]) { + nth_max_bin[0] = i; } + + for (n = 1; n < num_modes; n++) { + if (histogram[i] >= histogram[nth_max_bin[n]] && + (!histogram[nth_max_bin[n-1]] + || histogram[i] < histogram[nth_max_bin[n-1]])) { + nth_max_bin[n] = i; + } + } + } + + for (n = 0; n < num_modes; n++) { + bin_counts += histogram[nth_max_bin[n]]; + ret += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]]; + log_info(LD_CIRC, "Xm mode #%d: %u %u", n, CBT_BIN_TO_MS(nth_max_bin[n]), + histogram[nth_max_bin[n]]); } + ret /= bin_counts; tor_free(histogram); - return max_bin*CBT_BIN_WIDTH+CBT_BIN_WIDTH/2; + return ret; } /** @@ -436,7 +466,7 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, line->key = tor_strdup("CircuitBuildTimeBin"); line->value = tor_malloc(25); tor_snprintf(line->value, 25, "%d %d", - i*CBT_BIN_WIDTH+CBT_BIN_WIDTH/2, histogram[i]); + CBT_BIN_TO_MS(i), histogram[i]); next = &(line->next); } @@ -596,7 +626,7 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */ /* We sort of cheat here and make our samples slightly more pareto-like * and less frechet-like. */ - cbt->Xm = circuit_build_times_mode(cbt); + cbt->Xm = circuit_build_times_get_xm(cbt); for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) { if (!x[i]) { @@ -763,7 +793,7 @@ circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) (cbt->pre_timeouts+cbt->total_build_times); /* Make sure it doesn't exceed the synthetic max */ timeout_quantile *= CBT_MAX_SYNTHETIC_QUANTILE; - cbt->Xm = circuit_build_times_mode(cbt); + cbt->Xm = circuit_build_times_get_xm(cbt); tor_assert(cbt->Xm > 0); /* Use current timeout to get an estimate on alpha */ circuit_build_times_initial_alpha(cbt, timeout_quantile, diff --git a/src/or/or.h b/src/or/or.h index 9c613d28d1..0d35de1be0 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3023,6 +3023,9 @@ void entry_guards_free_all(void); /** Width of the histogram bins in milliseconds */ #define CBT_BIN_WIDTH ((build_time_t)50) +/** Number of modes to use in the weighted-avg computation of Xm */ +#define CBT_NUM_XM_MODES (3) + /** A build_time_t is milliseconds */ typedef uint32_t build_time_t; #define CBT_BUILD_TIME_MAX ((build_time_t)(INT32_MAX)) -- cgit v1.2.3-54-g00ecf From 3bbc3e21375d8d792a25047e4219604efe377f9e Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Fri, 7 May 2010 16:19:44 -0700 Subject: Bug 1335: Implement filtering step to remove+prevent high timeouts. This is for the other issue we saw in Bug 1335. A large number of high timeouts were causing the timeout calculation to slowly drift upwards, especially in conditions of load. This fix repeatedly regenerates all of our synthetic timeouts whenever the timeout changes, to try to prevent drift. It also lowers the timeout cap to help for some cases of Bug 1245, where some timeout values were so large that we ended up allocating a ton of scratch memory to count the histogram bins. The downside is that lowering this cap is affecting our timeout rate. Unfortunately, the buildtimeout quantile is now higher than the actual completion rate by what appears to be about 7-10%, which probably represents the skew in the distribution due to lowering this synthetic cap. --- src/or/circuitbuild.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++----- src/or/or.h | 2 +- 2 files changed, 72 insertions(+), 8 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 98f6baf2bd..ddc86191f0 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -1029,15 +1029,52 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt, return 1; } +static int +circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) +{ + int num_filtered=0, i=0; + build_time_t max_timeout; + + /* If we replace high timeouts when the curve is really steep + * (alpha is above ~1.5), we can end up with a lot of timeouts clustered + * really close to the current timeout value, which makes the next + * alpha even higher, which makes us filter out more, and this becomes + * progressively worse until the timeout *is* Xm. Filtering should + * only be done on really shallow curves anyway. */ + if (cbt->alpha > 1.5) { + return 0; + } + + max_timeout = tor_lround(circuit_build_times_calculate_timeout(cbt, + CBT_MAX_SYNTHETIC_QUANTILE)); + + for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] > max_timeout) { + build_time_t replaced = cbt->circuit_build_times[i]; + num_filtered++; + cbt->circuit_build_times[i] = + MIN(circuit_build_times_generate_sample(cbt, + circuit_build_times_quantile_cutoff(), + CBT_MAX_SYNTHETIC_QUANTILE), + CBT_BUILD_TIME_MAX); + + log_info(LD_CIRC, "Replaced timeout %d with %d", replaced, + cbt->circuit_build_times[i]); + } + } + + return num_filtered; +} + /** * Estimate a new timeout based on history and set our timeout * variable accordingly. */ -void -circuit_build_times_set_timeout(circuit_build_times_t *cbt) +static int +circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt) { if (cbt->total_build_times < circuit_build_times_min_circs_to_observe()) { - return; + return 0; } circuit_build_times_count_pretimeouts(cbt); @@ -1045,8 +1082,27 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) cbt->timeout_ms = circuit_build_times_calculate_timeout(cbt, circuit_build_times_quantile_cutoff()); - cbt->have_computed_timeout = 1; + return 1; +} + +/** + * Exposed function to compute a new timeout. Dispatches events and + * also filters out extremely high timeout values. + */ +void +circuit_build_times_set_timeout(circuit_build_times_t *cbt) +{ + int filtered=0,timeouts=0,i=0; + + if (!circuit_build_times_set_timeout_worker(cbt)) + return; + + if ((filtered = circuit_build_times_filter_timeouts(cbt)) > 0) { + log_info(LD_CIRC, "Filtered out %d timeouts. Recomputing timeout.", + filtered); + circuit_build_times_set_timeout_worker(cbt); + } if (cbt->timeout_ms < circuit_build_times_min_timeout()) { log_warn(LD_CIRC, "Set buildtimeout to low value %lfms. Setting to %dms", @@ -1056,10 +1112,18 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_COMPUTED); + for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] >= cbt->timeout_ms) { + timeouts++; + } + } + log_info(LD_CIRC, - "Set circuit build timeout to %lds (%lfms, Xm: %d, a: %lf) " - "based on %d circuit times", tor_lround(cbt->timeout_ms/1000), - cbt->timeout_ms, cbt->Xm, cbt->alpha, cbt->total_build_times); + "Set circuit build timeout to %lds (%lfms, Xm: %d, a: %lf, r: " + "%lf) based on %d circuit times", tor_lround(cbt->timeout_ms/1000), + cbt->timeout_ms, cbt->Xm, cbt->alpha, + 1.0-((double)timeouts)/cbt->total_build_times, + cbt->total_build_times); } /** Iterate over values of circ_id, starting from conn-\>next_circ_id, diff --git a/src/or/or.h b/src/or/or.h index 0d35de1be0..7402f4a385 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3018,7 +3018,7 @@ void entry_guards_free_all(void); /** Maximum quantile to use to generate synthetic timeouts. * We want to stay a bit short of 1.0, because longtail is * loooooooooooooooooooooooooooooooooooooooooooooooooooong. */ -#define CBT_MAX_SYNTHETIC_QUANTILE 0.985 +#define CBT_MAX_SYNTHETIC_QUANTILE 0.90 /** Width of the histogram bins in milliseconds */ #define CBT_BIN_WIDTH ((build_time_t)50) -- cgit v1.2.3-54-g00ecf From 835ab531028911fbec6e9706a1ba33bcab37f102 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Fri, 7 May 2010 14:43:56 -0700 Subject: Add a TIMEOUT_RATE keyword to buildtimeout event. --- doc/spec/control-spec.txt | 3 ++- src/or/circuitbuild.c | 51 ++++++++++++++++++++++++++++++----------------- src/or/control.c | 6 ++++-- src/or/or.h | 1 + 4 files changed, 40 insertions(+), 21 deletions(-) (limited to 'src/or') diff --git a/doc/spec/control-spec.txt b/doc/spec/control-spec.txt index b60baba052..1938fa226c 100644 --- a/doc/spec/control-spec.txt +++ b/doc/spec/control-spec.txt @@ -1670,13 +1670,14 @@ The syntax is: "650" SP "BUILDTIMEOUT_SET" SP Type SP "TOTAL_TIMES=" Total SP "TIMEOUT_MS=" Timeout SP "XM=" Xm SP "ALPHA=" Alpha SP - "CUTOFF_QUANTILE=" Quantile CRLF + "CUTOFF_QUANTILE=" Quantile SP "TIMEOUT_RATE=" Rate CRLF Type = "COMPUTED" / "RESET" / "SUSPENDED" / "DISCARD" / "RESUME" Total = Integer count of timeouts stored Timeout = Integer timeout in milliseconds Xm = Estimated integer Pareto parameter Xm in milliseconds Alpha = Estimated floating point Paredo paremter alpha Quantile = Floating point CDF quantile cutoff point for this timeout + Rate = Floating point ratio of circuits that timeout A new circuit build timeout time has been set. If Type is "COMPUTED", Tor has computed the value based on historical data. If Type is "RESET", diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index ddc86191f0..b54952450a 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -306,8 +306,7 @@ circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) return -1; } - // XXX: Probably want to demote this to debug for the release. - log_info(LD_CIRC, "Adding circuit build time %u", time); + log_debug(LD_CIRC, "Adding circuit build time %u", time); cbt->circuit_build_times[cbt->build_times_idx] = time; cbt->build_times_idx = (cbt->build_times_idx + 1) % CBT_NCIRCUITS_TO_OBSERVE; @@ -733,9 +732,6 @@ void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, double quantile_cutoff) { - // XXX: This may be failing when the number of samples is small? - // Keep getting values for the largest timeout bucket over and over - // again... Probably because alpha is very very large in that case.. build_time_t gentime = circuit_build_times_generate_sample(cbt, quantile_cutoff, CBT_MAX_SYNTHETIC_QUANTILE); @@ -988,6 +984,22 @@ circuit_build_times_network_check_changed(circuit_build_times_t *cbt) return 1; } +/** + * Count the number of timeouts in a set of cbt data. + */ +double +circuit_build_times_timeout_rate(const circuit_build_times_t *cbt) +{ + int i=0,timeouts=0; + for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] >= cbt->timeout_ms) { + timeouts++; + } + } + + return ((double)timeouts)/cbt->total_build_times; +} + /** * Store a timeout as a synthetic value. * @@ -1024,7 +1036,7 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt, circuit_build_times_count_pretimeouts(cbt); circuit_build_times_add_timeout_worker(cbt, - circuit_build_times_quantile_cutoff()); + circuit_build_times_quantile_cutoff()); return 1; } @@ -1033,7 +1045,8 @@ static int circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) { int num_filtered=0, i=0; - build_time_t max_timeout; + double timeout_rate = 0; + build_time_t max_timeout = 0; /* If we replace high timeouts when the curve is really steep * (alpha is above ~1.5), we can end up with a lot of timeouts clustered @@ -1045,6 +1058,7 @@ circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) return 0; } + timeout_rate = circuit_build_times_timeout_rate(cbt); max_timeout = tor_lround(circuit_build_times_calculate_timeout(cbt, CBT_MAX_SYNTHETIC_QUANTILE)); @@ -1054,15 +1068,21 @@ circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) num_filtered++; cbt->circuit_build_times[i] = MIN(circuit_build_times_generate_sample(cbt, - circuit_build_times_quantile_cutoff(), - CBT_MAX_SYNTHETIC_QUANTILE), - CBT_BUILD_TIME_MAX); + circuit_build_times_quantile_cutoff(), + CBT_MAX_SYNTHETIC_QUANTILE), + CBT_BUILD_TIME_MAX); - log_info(LD_CIRC, "Replaced timeout %d with %d", replaced, + log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced, cbt->circuit_build_times[i]); } } + log_info(LD_CIRC, + "We had %d timeouts out of %d build times, " + "and filtered %d above the max of %u", + (int)(cbt->total_build_times*timeout_rate), + cbt->total_build_times, num_filtered, max_timeout); + return num_filtered; } @@ -1094,13 +1114,12 @@ void circuit_build_times_set_timeout(circuit_build_times_t *cbt) { int filtered=0,timeouts=0,i=0; + double timeout_rate; if (!circuit_build_times_set_timeout_worker(cbt)) return; if ((filtered = circuit_build_times_filter_timeouts(cbt)) > 0) { - log_info(LD_CIRC, "Filtered out %d timeouts. Recomputing timeout.", - filtered); circuit_build_times_set_timeout_worker(cbt); } @@ -1112,11 +1131,7 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_COMPUTED); - for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { - if (cbt->circuit_build_times[i] >= cbt->timeout_ms) { - timeouts++; - } - } + timeout_rate = circuit_build_times_timeout_rate(cbt); log_info(LD_CIRC, "Set circuit build timeout to %lds (%lfms, Xm: %d, a: %lf, r: " diff --git a/src/or/control.c b/src/or/control.c index 5797edfdce..b5cc6c7b6d 100644 --- a/src/or/control.c +++ b/src/or/control.c @@ -3529,10 +3529,12 @@ control_event_buildtimeout_set(const circuit_build_times_t *cbt, send_control_event(EVENT_BUILDTIMEOUT_SET, ALL_FORMATS, "650 BUILDTIMEOUT_SET %s TOTAL_TIMES=%lu " - "TIMEOUT_MS=%lu XM=%lu ALPHA=%lf CUTOFF_QUANTILE=%lf\r\n", + "TIMEOUT_MS=%lu XM=%lu ALPHA=%lf CUTOFF_QUANTILE=%lf " + "TIMEOUT_RATE=%lf\r\n", type_string, (unsigned long)cbt->total_build_times, (unsigned long)cbt->timeout_ms, - (unsigned long)cbt->Xm, cbt->alpha, qnt); + (unsigned long)cbt->Xm, cbt->alpha, qnt, + circuit_build_times_timeout_rate(cbt)); return 0; } diff --git a/src/or/or.h b/src/or/or.h index 7402f4a385..c4cafcb12a 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3152,6 +3152,7 @@ int circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt); void circuit_build_times_init(circuit_build_times_t *cbt); void circuit_build_times_new_consensus_params(circuit_build_times_t *cbt, networkstatus_t *ns); +double circuit_build_times_timeout_rate(const circuit_build_times_t *cbt); #ifdef CIRCUIT_PRIVATE double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, -- cgit v1.2.3-54-g00ecf From eecdd94dec6cdb3515b0500a1d15170a199bee41 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Thu, 6 May 2010 14:53:05 -0700 Subject: Add consensus parameter for max synthetic quantile. In case we decide that the timeout rate is now too high due to our change of the max synthetic quantile value, this consensus parameter will allow us to restore it to the previous value. --- src/or/circuitbuild.c | 16 ++++++++++++---- src/or/or.h | 4 ++-- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index b54952450a..d7245c5caf 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -105,6 +105,14 @@ circuit_build_times_quantile_cutoff(void) return num/100.0; } +static double +circuit_build_times_max_synthetic_quantile(void) +{ + int32_t num = networkstatus_get_param(NULL, "cbtmaxsynthquantile", + CBT_DEFAULT_MAX_SYNTHETIC_QUANTILE); + return num/100.0; +} + static int32_t circuit_build_times_test_frequency(void) { @@ -733,7 +741,7 @@ circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, double quantile_cutoff) { build_time_t gentime = circuit_build_times_generate_sample(cbt, - quantile_cutoff, CBT_MAX_SYNTHETIC_QUANTILE); + quantile_cutoff, circuit_build_times_max_synthetic_quantile()); if (gentime < (build_time_t)tor_lround(cbt->timeout_ms)) { log_warn(LD_CIRC, @@ -788,7 +796,7 @@ circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) ((double)cbt->pre_timeouts)/ (cbt->pre_timeouts+cbt->total_build_times); /* Make sure it doesn't exceed the synthetic max */ - timeout_quantile *= CBT_MAX_SYNTHETIC_QUANTILE; + timeout_quantile *= circuit_build_times_max_synthetic_quantile(); cbt->Xm = circuit_build_times_get_xm(cbt); tor_assert(cbt->Xm > 0); /* Use current timeout to get an estimate on alpha */ @@ -1060,7 +1068,7 @@ circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) timeout_rate = circuit_build_times_timeout_rate(cbt); max_timeout = tor_lround(circuit_build_times_calculate_timeout(cbt, - CBT_MAX_SYNTHETIC_QUANTILE)); + circuit_build_times_max_synthetic_quantile())); for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { if (cbt->circuit_build_times[i] > max_timeout) { @@ -1069,7 +1077,7 @@ circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) cbt->circuit_build_times[i] = MIN(circuit_build_times_generate_sample(cbt, circuit_build_times_quantile_cutoff(), - CBT_MAX_SYNTHETIC_QUANTILE), + circuit_build_times_max_synthetic_quantile()), CBT_BUILD_TIME_MAX); log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced, diff --git a/src/or/or.h b/src/or/or.h index c4cafcb12a..c8886cff1e 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3016,9 +3016,9 @@ void entry_guards_free_all(void); #define CBT_NCIRCUITS_TO_OBSERVE 1000 /** Maximum quantile to use to generate synthetic timeouts. - * We want to stay a bit short of 1.0, because longtail is + * We want to stay a bit short of 100, because longtail is * loooooooooooooooooooooooooooooooooooooooooooooooooooong. */ -#define CBT_MAX_SYNTHETIC_QUANTILE 0.90 +#define CBT_DEFAULT_MAX_SYNTHETIC_QUANTILE 90 /** Width of the histogram bins in milliseconds */ #define CBT_BIN_WIDTH ((build_time_t)50) -- cgit v1.2.3-54-g00ecf From e40e35507e91e8724a37ef2e6bf5828025f2dbe5 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 4 May 2010 12:56:47 -0700 Subject: Bump timeout calculation message to notice when timeout changes. --- src/or/circuitbuild.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index d7245c5caf..d597b752ad 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -1121,7 +1121,8 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt) void circuit_build_times_set_timeout(circuit_build_times_t *cbt) { - int filtered=0,timeouts=0,i=0; + long prev_timeout = tor_lround(cbt->timeout_ms/1000); + int filtered=0; double timeout_rate; if (!circuit_build_times_set_timeout_worker(cbt)) @@ -1141,12 +1142,34 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) timeout_rate = circuit_build_times_timeout_rate(cbt); - log_info(LD_CIRC, - "Set circuit build timeout to %lds (%lfms, Xm: %d, a: %lf, r: " - "%lf) based on %d circuit times", tor_lround(cbt->timeout_ms/1000), - cbt->timeout_ms, cbt->Xm, cbt->alpha, - 1.0-((double)timeouts)/cbt->total_build_times, - cbt->total_build_times); + if (prev_timeout > tor_lround(cbt->timeout_ms/1000)) { + log_notice(LD_CIRC, + "Based on %d cicuit times, it looks like we don't need to " + "wait so long for circuits to finish. We will now assume a " + "circuit is too slow to use after waiting %ld seconds.", + cbt->total_build_times, + tor_lround(cbt->timeout_ms/1000)); + log_info(LD_CIRC, + "Circuit timeout data: %lfms, Xm: %d, a: %lf, r: %lf", + cbt->timeout_ms, cbt->Xm, cbt->alpha, timeout_rate); + } else if (prev_timeout < tor_lround(cbt->timeout_ms/1000)) { + log_notice(LD_CIRC, + "Based on %d cicuit times, it looks like we need to wait " + "wait longer for circuits to finish. We will now assume a " + "circuit is too slow to use after waiting %ld seconds.", + cbt->total_build_times, + tor_lround(cbt->timeout_ms/1000)); + log_info(LD_CIRC, + "Circuit timeout data: %lfms, Xm: %d, a: %lf, r: %lf", + cbt->timeout_ms, cbt->Xm, cbt->alpha, timeout_rate); + } else { + log_info(LD_CIRC, + "Set circuit build timeout to %lds (%lfms, Xm: %d, a: %lf, " + "r: %lf) based on %d circuit times", + tor_lround(cbt->timeout_ms/1000), + cbt->timeout_ms, cbt->Xm, cbt->alpha, timeout_rate, + cbt->total_build_times); + } } /** Iterate over values of circ_id, starting from conn-\>next_circ_id, -- cgit v1.2.3-54-g00ecf From 728e946efd87d5cd0a9ff073eeeb7b4fe9c3c0db Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Fri, 7 May 2010 15:42:57 -0700 Subject: Bug 1245: Ignore negative and large timeouts. This should prevent some asserts and storage of incorrect build times for the cases where Tor is suspended during a circuit construction, or just after completing a circuit. The idea is that if the circuit build time is much greater than we would have cut it off at, we probably had a suspend event along this codepath, and we should discard the value. --- src/or/circuitbuild.c | 22 +++++++++++++++------- src/or/circuituse.c | 15 +++++++++++++-- 2 files changed, 28 insertions(+), 9 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index d597b752ad..0840e304f1 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -308,9 +308,9 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n) int circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) { - tor_assert(time <= CBT_BUILD_TIME_MAX); - if (time <= 0) { + if (time <= 0 || time > CBT_BUILD_TIME_MAX) { log_warn(LD_CIRC, "Circuit build time is %u!", time); + tor_fragile_assert(); return -1; } @@ -1760,11 +1760,19 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) long timediff; tor_gettimeofday(&end); timediff = tv_mdiff(&circ->_base.highres_created, &end); - if (timediff > INT32_MAX) - timediff = INT32_MAX; - circuit_build_times_add_time(&circ_times, (build_time_t)timediff); - circuit_build_times_network_circ_success(&circ_times); - circuit_build_times_set_timeout(&circ_times); + /* + * If the circuit build time is much greater than we would have cut + * it off at, we probably had a suspend event along this codepath, + * and we should discard the value. + */ + if (timediff < 0 || timediff > 2*circ_times.timeout_ms+1000) { + log_notice(LD_CIRC, "Strange value for circuit build time: %ld. " + "Assuming clock jump.", timediff); + } else { + circuit_build_times_add_time(&circ_times, (build_time_t)timediff); + circuit_build_times_network_circ_success(&circ_times); + circuit_build_times_set_timeout(&circ_times); + } } log_info(LD_CIRC,"circuit built!"); circuit_reset_failure_count(0); diff --git a/src/or/circuituse.c b/src/or/circuituse.c index 7e47e60559..e7d10a22f2 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -350,9 +350,20 @@ circuit_expire_building(time_t now) } else { /* circuit not open, consider recording failure as timeout */ int first_hop_succeeded = TO_ORIGIN_CIRCUIT(victim)->cpath && TO_ORIGIN_CIRCUIT(victim)->cpath->state == CPATH_STATE_OPEN; - if (circuit_build_times_add_timeout(&circ_times, first_hop_succeeded, - victim->timestamp_created)) + /* + * If the circuit build time is much greater than we would have cut + * it off at, we probably had a suspend event along this codepath, + * and we should discard the value. + */ + if (now - victim->timestamp_created > (2*circ_times.timeout_ms)/1000+1) { + log_notice(LD_CIRC, + "Extremely large value for circuit build timeout: %ld. " + "Assuming clock jump.", now - victim->timestamp_created); + } else if (circuit_build_times_add_timeout(&circ_times, + first_hop_succeeded, + victim->timestamp_created)) { circuit_build_times_set_timeout(&circ_times); + } } if (victim->n_conn) -- cgit v1.2.3-54-g00ecf From 0a6191cf701617d51d30c468b5987b62ad75883c Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Sat, 8 May 2010 10:41:00 -0700 Subject: Bug 1357: Store the suspended timeout value to resume. This prevents a spurious warning where we have a timeout just after deciding our network came back online. --- src/or/circuitbuild.c | 7 ++++--- src/or/or.h | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 0840e304f1..3600ce7c4d 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -922,17 +922,18 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt) "Temporarily raising timeout to %lds.", (long int)(now - cbt->liveness.network_last_live), tor_lround(circuit_build_times_get_initial_timeout()/1000)); + cbt->liveness.suspended_timeout = cbt->timeout_ms; cbt->timeout_ms = circuit_build_times_get_initial_timeout(); - cbt->liveness.net_suspended = 1; control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_SUSPENDED); } return 0; - } else if (cbt->liveness.net_suspended) { + } else if (cbt->liveness.suspended_timeout) { log_notice(LD_CIRC, "Network activity has resumed. " "Resuming circuit timeout calculations."); - cbt->liveness.net_suspended = 0; + cbt->timeout_ms = cbt->liveness.suspended_timeout; + cbt->liveness.suspended_timeout = 0; control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESUME); } diff --git a/src/or/or.h b/src/or/or.h index c8886cff1e..f31847408e 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3108,8 +3108,9 @@ typedef struct { int num_recent_circs; /** Index into circular array. */ int after_firsthop_idx; - /** The network is not live. Timeout gathering is suspended */ - int net_suspended; + /** Timeout gathering is suspended if non-zero. The old timeout value + * is stored here in that case. */ + build_time_t suspended_timeout; } network_liveness_t; /** Structure for circuit build times history */ -- cgit v1.2.3-54-g00ecf From 29e0d7081454c49cb6cab14685447aa811379a22 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Sat, 8 May 2010 11:54:29 -0700 Subject: Bug 1296: Add option+logic to disable CBT learning. There are now four ways that CBT can be disabled: 1. Network-wide, with the cbtdisabled consensus param. 2. Via config, with "LearnCircuitBuildTimeout 0" 3. Via config, with "AuthoritativeDirectory 1" 4. Via a state file write failure. --- doc/spec/path-spec.txt | 6 ++++++ doc/tor.1.txt | 9 ++++++++- src/or/circuitbuild.c | 38 +++++++++++++++++++++++++++++++++++--- src/or/config.c | 5 ++++- src/or/or.h | 9 ++++++--- 5 files changed, 59 insertions(+), 8 deletions(-) (limited to 'src/or') diff --git a/doc/spec/path-spec.txt b/doc/spec/path-spec.txt index ae5ab3e3dd..2beeedeace 100644 --- a/doc/spec/path-spec.txt +++ b/doc/spec/path-spec.txt @@ -382,6 +382,12 @@ of their choices. construction. If these parameters are not present in the consensus, the listed default values should be used instead. + cbtdisabled + Default: 0 + Effect: If non-zero, all CircuitBuildTime learning code should be + disabled and history should be discarded. For use in + emergency situations only. + cbtrecentcount Default: 20 Effect: This is the number of circuit build times to keep track of diff --git a/doc/tor.1.txt b/doc/tor.1.txt index 2ae5005d8c..e6bce02e3b 100644 --- a/doc/tor.1.txt +++ b/doc/tor.1.txt @@ -419,9 +419,16 @@ The following options are useful only for clients (that is, if fingerprint to look up the bridge descriptor at the bridge authority, if it's provided and if UpdateBridgesFromAuthority is set too. +**LearnCircuitBuildTimeout** **0**|**1**:: + If 0, CircuitBuildTimeout adaptive learning is disabled. (Default: 1) + **CircuitBuildTimeout** __NUM__:: + Try for at most NUM seconds when building circuits. If the circuit isn't - open in that time, give up on it. (Default: 1 minute.) + open in that time, give up on it. If LearnCircuitBuildTimeout is 1, this + value serves as the initial value to use before a timeout is learned. If + LearnCircuitBuildTimeout is 0, this value is the only value used. + (Default: 60 seconds.) **CircuitIdleTimeout** __NUM__:: If we have kept a clean (never used) circuit around for NUM seconds, then diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 3600ce7c4d..4f79d32444 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -81,6 +81,28 @@ static int onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice); static void entry_guards_changed(void); +static int +circuit_build_times_disabled(void) +{ + int consensus_disabled = networkstatus_get_param(NULL, "cbtdisabled", + 0); + int config_disabled = !get_options()->LearnCircuitBuildTimeout; + int dirauth_disabled = get_options()->AuthoritativeDir; + int state_disabled = (get_or_state()->LastWritten == -1); + + if (consensus_disabled || config_disabled || dirauth_disabled || + state_disabled) { + log_info(LD_CIRC, + "CircuitBuildTime learning is disabled. " + "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d", + consensus_disabled, config_disabled, dirauth_disabled, + state_disabled); + return 1; + } else { + return 0; + } +} + static int32_t circuit_build_times_max_timeouts(void) { @@ -531,11 +553,16 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, uint32_t loaded_cnt = 0, N = 0; config_line_t *line; int i; - build_time_t *loaded_times = tor_malloc(sizeof(build_time_t) - * state->TotalBuildTimes); + build_time_t *loaded_times; circuit_build_times_init(cbt); *msg = NULL; + if (circuit_build_times_disabled()) { + return 0; + } + + loaded_times = tor_malloc(sizeof(build_time_t)*state->TotalBuildTimes); + for (line = state->BuildtimeHistogram; line; line = line->next) { smartlist_t *args = smartlist_create(); smartlist_split_string(args, line->value, " ", @@ -1020,6 +1047,11 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt, int did_onehop, time_t start_time) { + if (circuit_build_times_disabled()) { + cbt->timeout_ms = circuit_build_times_get_initial_timeout(); + return 0; + } + circuit_build_times_network_timeout(cbt, did_onehop, start_time); /* Only count timeouts if network is live.. */ @@ -1769,7 +1801,7 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) if (timediff < 0 || timediff > 2*circ_times.timeout_ms+1000) { log_notice(LD_CIRC, "Strange value for circuit build time: %ld. " "Assuming clock jump.", timediff); - } else { + } else if (!circuit_build_times_disabled()) { circuit_build_times_add_time(&circ_times, (build_time_t)timediff); circuit_build_times_network_circ_success(&circ_times); circuit_build_times_set_timeout(&circ_times); diff --git a/src/or/config.c b/src/or/config.c index 5d07cd7343..77ef45ec42 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -167,6 +167,7 @@ static config_var_t _option_vars[] = { V(BridgeRecordUsageByCountry, BOOL, "1"), V(BridgeRelay, BOOL, "0"), V(CellStatistics, BOOL, "0"), + V(LearnCircuitBuildTimeout, BOOL, "1"), V(CircuitBuildTimeout, INTERVAL, "0"), V(CircuitIdleTimeout, INTERVAL, "1 hour"), V(CircuitStreamTimeout, INTERVAL, "0"), @@ -4982,7 +4983,6 @@ or_state_save(time_t now) if (accounting_is_enabled(get_options())) accounting_run_housekeeping(now); - global_state->LastWritten = time(NULL); tor_free(global_state->TorVersion); tor_asprintf(&global_state->TorVersion, "Tor %s", get_version()); @@ -4997,10 +4997,13 @@ or_state_save(time_t now) fname = get_datadir_fname("state"); if (write_str_to_file(fname, contents, 0)<0) { log_warn(LD_FS, "Unable to write state to file \"%s\"", fname); + global_state->LastWritten = -1; tor_free(fname); tor_free(contents); return -1; } + + global_state->LastWritten = time(NULL); log_info(LD_GENERAL, "Saved state to \"%s\"", fname); tor_free(fname); tor_free(contents); diff --git a/src/or/or.h b/src/or/or.h index f31847408e..36c45e2db0 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2496,9 +2496,12 @@ typedef struct { * connections alive? */ int SocksTimeout; /**< How long do we let a socks connection wait * unattached before we fail it? */ - int CircuitBuildTimeout; /**< If non-zero, cull non-open circuits that - * were born at least this many seconds ago. If - * zero, use the internal adaptive algorithm. */ + int LearnCircuitBuildTimeout; /**< If non-zero, we attempt to learn a value + * for CircuitBuildTimeout based on timeout + * history */ + int CircuitBuildTimeout; /**< Cull non-open circuits that were born at + * least this many seconds ago. Used until + * adaptive algorithm learns a new value. */ int CircuitIdleTimeout; /**< Cull open clean circuits that were born * at least this many seconds ago. */ int CircuitStreamTimeout; /**< If non-zero, detach streams from circuits -- cgit v1.2.3-54-g00ecf From a5ac96b58d17ff14e63ae714f117c04d23c44215 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Mon, 10 May 2010 19:56:27 -0700 Subject: Fix comments from Sebastian + Nick's code review. Check for overflow in one place, and be consistent about type usage. --- src/or/circuitbuild.c | 11 ++++++++--- src/or/or.h | 5 +++-- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 4f79d32444..1d4d267b7c 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -955,7 +955,7 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt) } return 0; - } else if (cbt->liveness.suspended_timeout) { + } else if (cbt->liveness.suspended_timeout > 0) { log_notice(LD_CIRC, "Network activity has resumed. " "Resuming circuit timeout calculations."); @@ -1004,7 +1004,12 @@ circuit_build_times_network_check_changed(circuit_build_times_t *cbt) /* Check to see if this has happened before. If so, double the timeout * to give people on abysmally bad network connections a shot at access */ if (cbt->timeout_ms >= circuit_build_times_get_initial_timeout()) { - cbt->timeout_ms *= 2; + if (cbt->timeout_ms > INT32_MAX/2) { + log_warn(LD_CIRC, "Insanely large circuit build timeout value: %lf", + cbt->timeout_ms); + } else { + cbt->timeout_ms *= 2; + } } else { cbt->timeout_ms = circuit_build_times_get_initial_timeout(); } @@ -1100,7 +1105,7 @@ circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) } timeout_rate = circuit_build_times_timeout_rate(cbt); - max_timeout = tor_lround(circuit_build_times_calculate_timeout(cbt, + max_timeout = (build_time_t)tor_lround(circuit_build_times_calculate_timeout(cbt, circuit_build_times_max_synthetic_quantile())); for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { diff --git a/src/or/or.h b/src/or/or.h index 36c45e2db0..7b1982ae2e 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3113,7 +3113,7 @@ typedef struct { int after_firsthop_idx; /** Timeout gathering is suspended if non-zero. The old timeout value * is stored here in that case. */ - build_time_t suspended_timeout; + double suspended_timeout; } network_liveness_t; /** Structure for circuit build times history */ @@ -3137,7 +3137,8 @@ typedef struct { double alpha; /** Have we computed a timeout? */ int have_computed_timeout; - /** The exact value for that timeout in milliseconds */ + /** The exact value for that timeout in milliseconds. Stored as a double + * to maintain precision from calculations to and from quantile value. */ double timeout_ms; } circuit_build_times_t; -- cgit v1.2.3-54-g00ecf From d9be6f38450fd4c20f5c3711e68676547997b23c Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Wed, 12 May 2010 15:31:22 -0700 Subject: Fix CBT unit tests. --- src/or/circuitbuild.c | 40 +++++++++++++++++++++++++--------------- src/test/test.c | 4 ++-- 2 files changed, 27 insertions(+), 17 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 1d4d267b7c..97e7c1c0f9 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -84,22 +84,26 @@ static void entry_guards_changed(void); static int circuit_build_times_disabled(void) { - int consensus_disabled = networkstatus_get_param(NULL, "cbtdisabled", - 0); - int config_disabled = !get_options()->LearnCircuitBuildTimeout; - int dirauth_disabled = get_options()->AuthoritativeDir; - int state_disabled = (get_or_state()->LastWritten == -1); - - if (consensus_disabled || config_disabled || dirauth_disabled || - state_disabled) { - log_info(LD_CIRC, - "CircuitBuildTime learning is disabled. " - "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d", - consensus_disabled, config_disabled, dirauth_disabled, - state_disabled); - return 1; - } else { + if (unit_tests) { return 0; + } else { + int consensus_disabled = networkstatus_get_param(NULL, "cbtdisabled", + 0); + int config_disabled = !get_options()->LearnCircuitBuildTimeout; + int dirauth_disabled = get_options()->AuthoritativeDir; + int state_disabled = (get_or_state()->LastWritten == -1); + + if (consensus_disabled || config_disabled || dirauth_disabled || + state_disabled) { + log_info(LD_CIRC, + "CircuitBuildTime learning is disabled. " + "Consensus=%d, Config=%d, AuthDir=%d, StateFile=%d", + consensus_disabled, config_disabled, dirauth_disabled, + state_disabled); + return 1; + } else { + return 0; + } } } @@ -1104,6 +1108,12 @@ circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) return 0; } + /* Do not filter for unit tests. They expect the distribution to be + * unchanged from run to run. */ + if (unit_tests) { + return 0; + } + timeout_rate = circuit_build_times_timeout_rate(cbt); max_timeout = (build_time_t)tor_lround(circuit_build_times_calculate_timeout(cbt, circuit_build_times_max_synthetic_quantile())); diff --git a/src/test/test.c b/src/test/test.c index 8d596981ef..eb06c762df 100644 --- a/src/test/test.c +++ b/src/test/test.c @@ -494,7 +494,7 @@ test_circuit_timeout(void) timeout1 = circuit_build_times_calculate_timeout(&estimate, CBT_DEFAULT_QUANTILE_CUTOFF/100.0); circuit_build_times_set_timeout(&estimate); - log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout1, estimate.Xm); + log_warn(LD_CIRC, "Timeout1 is %lf, Xm is %d", timeout1, estimate.Xm); /* XXX: 5% distribution error may not be the right metric */ } while (fabs(circuit_build_times_cdf(&initial, timeout0) - circuit_build_times_cdf(&initial, timeout1)) > 0.05 @@ -511,7 +511,7 @@ test_circuit_timeout(void) CBT_DEFAULT_QUANTILE_CUTOFF/100.0); circuit_build_times_set_timeout(&final); - log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout2, final.Xm); + log_warn(LD_CIRC, "Timeout2 is %lf, Xm is %d", timeout2, final.Xm); test_assert(fabs(circuit_build_times_cdf(&initial, timeout0) - circuit_build_times_cdf(&initial, timeout2)) < 0.05); -- cgit v1.2.3-54-g00ecf From d76ebb79aa2e96664a028dda59f820bf80629128 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 1 Jun 2010 22:07:54 -0700 Subject: Improve log message about large timeouts and fix some typos. --- src/or/circuitbuild.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 97e7c1c0f9..e5a9717659 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -335,7 +335,8 @@ int circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) { if (time <= 0 || time > CBT_BUILD_TIME_MAX) { - log_warn(LD_CIRC, "Circuit build time is %u!", time); + log_warn(LD_BUG, "Circuit build time is too large (%u)." + "This is probably a bug.", time); tor_fragile_assert(); return -1; } @@ -1192,7 +1193,7 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) if (prev_timeout > tor_lround(cbt->timeout_ms/1000)) { log_notice(LD_CIRC, - "Based on %d cicuit times, it looks like we don't need to " + "Based on %d circuit times, it looks like we don't need to " "wait so long for circuits to finish. We will now assume a " "circuit is too slow to use after waiting %ld seconds.", cbt->total_build_times, @@ -1202,8 +1203,8 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) cbt->timeout_ms, cbt->Xm, cbt->alpha, timeout_rate); } else if (prev_timeout < tor_lround(cbt->timeout_ms/1000)) { log_notice(LD_CIRC, - "Based on %d cicuit times, it looks like we need to wait " - "wait longer for circuits to finish. We will now assume a " + "Based on %d circuit times, it looks like we need to wait " + "longer for circuits to finish. We will now assume a " "circuit is too slow to use after waiting %ld seconds.", cbt->total_build_times, tor_lround(cbt->timeout_ms/1000)); -- cgit v1.2.3-54-g00ecf From dc880924b7d861b551471ac543bd367ddab81070 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 1 Jun 2010 22:10:22 -0700 Subject: Arma hates (\d+). Remove mine and some others. --- src/or/or.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/or') diff --git a/src/or/or.h b/src/or/or.h index 7b1982ae2e..2d40b8233a 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -158,7 +158,7 @@ #define MAX_DNS_TTL (3*60*60) /** How small can a TTL be before we stop believing it? Provides rudimentary * pinning. */ -#define MIN_DNS_TTL (60) +#define MIN_DNS_TTL 60 /** How often do we rotate onion keys? */ #define MIN_ONION_KEY_LIFETIME (7*24*60*60) @@ -3027,7 +3027,7 @@ void entry_guards_free_all(void); #define CBT_BIN_WIDTH ((build_time_t)50) /** Number of modes to use in the weighted-avg computation of Xm */ -#define CBT_NUM_XM_MODES (3) +#define CBT_NUM_XM_MODES 3 /** A build_time_t is milliseconds */ typedef uint32_t build_time_t; @@ -3046,7 +3046,7 @@ typedef uint32_t build_time_t; * at which point we switch back to computing the timeout from * our saved history. */ -#define CBT_NETWORK_NONLIVE_TIMEOUT_COUNT (3) +#define CBT_NETWORK_NONLIVE_TIMEOUT_COUNT 3 /** * This tells us when to toss out the last streak of N timeouts. @@ -3838,7 +3838,7 @@ download_status_mark_impossible(download_status_t *dl) /********************************* dirserv.c ***************************/ /** Maximum length of an exit policy summary. */ -#define MAX_EXITPOLICY_SUMMARY_LEN (1000) +#define MAX_EXITPOLICY_SUMMARY_LEN 1000 /** Maximum allowable length of a version line in a networkstatus. */ #define MAX_V_LINE_LEN 128 @@ -4394,7 +4394,7 @@ int nt_service_parse_options(int argc, char **argv, int *should_exit); int nt_service_is_stopping(void); void nt_service_set_state(DWORD state); #else -#define nt_service_is_stopping() (0) +#define nt_service_is_stopping() 0 #endif /********************************* onion.c ***************************/ -- cgit v1.2.3-54-g00ecf From 848d9f8b43e607bca448cad9c6dcf985d0692533 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Thu, 3 Jun 2010 02:36:43 -0700 Subject: Remove synthetic timeout code in favor of better Pareto model. --- src/or/circuitbuild.c | 128 ++++++++++---------------------------------------- src/or/or.h | 9 +--- 2 files changed, 25 insertions(+), 112 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index e5a9717659..7ee05d3471 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -131,14 +131,6 @@ circuit_build_times_quantile_cutoff(void) return num/100.0; } -static double -circuit_build_times_max_synthetic_quantile(void) -{ - int32_t num = networkstatus_get_param(NULL, "cbtmaxsynthquantile", - CBT_DEFAULT_MAX_SYNTHETIC_QUANTILE); - return num/100.0; -} - static int32_t circuit_build_times_test_frequency(void) { @@ -258,7 +250,6 @@ void circuit_build_times_reset(circuit_build_times_t *cbt) { memset(cbt->circuit_build_times, 0, sizeof(cbt->circuit_build_times)); - cbt->pre_timeouts = 0; cbt->total_build_times = 0; cbt->build_times_idx = 0; cbt->have_computed_timeout = 0; @@ -289,22 +280,6 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n) { int i = 0; - if (cbt->pre_timeouts) { - /* If we have pre-timeouts, it means we're not yet storing - * timeouts in our normal array. Only rewind the counter. */ - if (cbt->pre_timeouts > n) { - cbt->pre_timeouts -= n; - } else { - cbt->pre_timeouts = 0; - } - log_info(LD_CIRC, - "Rewound history by %d places. Current index: %d. Total: %d. " - "Pre-timeouts: %d", n, cbt->build_times_idx, - cbt->total_build_times, cbt->pre_timeouts); - - return; - } - cbt->build_times_idx -= n; cbt->build_times_idx %= CBT_NCIRCUITS_TO_OBSERVE; @@ -366,7 +341,8 @@ circuit_build_times_max(circuit_build_times_t *cbt) int i = 0; build_time_t max_build_time = 0; for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { - if (cbt->circuit_build_times[i] > max_build_time) + if (cbt->circuit_build_times[i] > max_build_time + && cbt->circuit_build_times[i] != CBT_BUILD_TIMEOUT) max_build_time = cbt->circuit_build_times[i]; } return max_build_time; @@ -413,7 +389,9 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt, // calculate histogram for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { - if (cbt->circuit_build_times[i] == 0) continue; /* 0 <-> uninitialized */ + if (cbt->circuit_build_times[i] == 0 + || cbt->circuit_build_times[i] == CBT_BUILD_TIMEOUT) + continue; /* 0 <-> uninitialized */ c = (cbt->circuit_build_times[i] / CBT_BIN_WIDTH); histogram[c]++; @@ -660,13 +638,16 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) { build_time_t *x=cbt->circuit_build_times; double a = 0; - int n=0,i=0; + int n=0,i=0,timeout_count=0; + build_time_t max_time=0; /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */ /* We sort of cheat here and make our samples slightly more pareto-like * and less frechet-like. */ cbt->Xm = circuit_build_times_get_xm(cbt); + tor_assert(cbt->Xm > 0); + for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) { if (!x[i]) { continue; @@ -674,8 +655,12 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) if (x[i] < cbt->Xm) { a += tor_mathlog(cbt->Xm); + } else if (x[i] == CBT_BUILD_TIMEOUT) { + timeout_count++; } else { a += tor_mathlog(x[i]); + if (x[i] > max_time) + max_time = x[i]; } n++; } @@ -686,8 +671,12 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) } tor_assert(n==cbt->total_build_times); + tor_assert(max_time > 0); + + a += timeout_count*tor_mathlog(max_time); + a -= n*tor_mathlog(cbt->Xm); - a = n/a; + a = (n-timeout_count)/a; cbt->alpha = a; } @@ -767,32 +756,6 @@ circuit_build_times_generate_sample(circuit_build_times_t *cbt, return ret; } -/** Generate points in [cutoff, 1.0) on the CDF. */ -void -circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, - double quantile_cutoff) -{ - build_time_t gentime = circuit_build_times_generate_sample(cbt, - quantile_cutoff, circuit_build_times_max_synthetic_quantile()); - - if (gentime < (build_time_t)tor_lround(cbt->timeout_ms)) { - log_warn(LD_CIRC, - "Generated a synthetic timeout LESS than the current timeout: " - "%ums vs %lfms using Xm: %d a: %lf, q: %lf", - gentime, cbt->timeout_ms, cbt->Xm, cbt->alpha, quantile_cutoff); - } else if (gentime > CBT_BUILD_TIME_MAX) { - log_info(LD_CIRC, - "Generated a synthetic timeout larger than the max: %u", - gentime); - gentime = CBT_BUILD_TIME_MAX; - } else { - log_info(LD_CIRC, "Generated synthetic circuit build time %u for timeout", - gentime); - } - - circuit_build_times_add_time(cbt, gentime); -} - /** * Estimate an initial alpha parameter by solving the quantile * function with a quantile point and a specific timeout value. @@ -814,33 +777,6 @@ circuit_build_times_initial_alpha(circuit_build_times_t *cbt, tor_assert(cbt->alpha > 0); } -/** - * Generate synthetic timeout values for the timeouts - * that have happened before we estimated our parameters. - */ -static void -circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) -{ - /* Store a timeout as a random position past the current - * cutoff on the pareto curve */ - if (cbt->pre_timeouts) { - double timeout_quantile = 1.0- - ((double)cbt->pre_timeouts)/ - (cbt->pre_timeouts+cbt->total_build_times); - /* Make sure it doesn't exceed the synthetic max */ - timeout_quantile *= circuit_build_times_max_synthetic_quantile(); - cbt->Xm = circuit_build_times_get_xm(cbt); - tor_assert(cbt->Xm > 0); - /* Use current timeout to get an estimate on alpha */ - circuit_build_times_initial_alpha(cbt, timeout_quantile, - cbt->timeout_ms); - while (cbt->pre_timeouts-- != 0) { - circuit_build_times_add_timeout_worker(cbt, timeout_quantile); - } - cbt->pre_timeouts = 0; - } -} - /** * Returns true if we need circuits to be built */ @@ -1075,20 +1011,7 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt, return 0; } - if (!cbt->have_computed_timeout) { - /* Store a timeout before we have enough data */ - cbt->pre_timeouts++; - log_info(LD_CIRC, - "Not enough circuits yet to calculate a new build timeout." - " Need %d more.", circuit_build_times_min_circs_to_observe() - - cbt->total_build_times); - return 0; - } - - circuit_build_times_count_pretimeouts(cbt); - circuit_build_times_add_timeout_worker(cbt, - circuit_build_times_quantile_cutoff()); - + circuit_build_times_add_time(cbt, CBT_BUILD_TIMEOUT); return 1; } @@ -1116,18 +1039,16 @@ circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) } timeout_rate = circuit_build_times_timeout_rate(cbt); - max_timeout = (build_time_t)tor_lround(circuit_build_times_calculate_timeout(cbt, - circuit_build_times_max_synthetic_quantile())); + max_timeout = /*(build_time_t)tor_lround(circuit_build_times_calculate_timeout(cbt, + circuit_build_times_max_synthetic_quantile()));*/ + (build_time_t)cbt->timeout_ms; + /* XXX: Make a special bin for timeout values */ for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { if (cbt->circuit_build_times[i] > max_timeout) { build_time_t replaced = cbt->circuit_build_times[i]; num_filtered++; - cbt->circuit_build_times[i] = - MIN(circuit_build_times_generate_sample(cbt, - circuit_build_times_quantile_cutoff(), - circuit_build_times_max_synthetic_quantile()), - CBT_BUILD_TIME_MAX); + cbt->circuit_build_times[i] = CBT_BUILD_TIMEOUT; log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced, cbt->circuit_build_times[i]); @@ -1154,7 +1075,6 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt) return 0; } - circuit_build_times_count_pretimeouts(cbt); circuit_build_times_update_alpha(cbt); cbt->timeout_ms = circuit_build_times_calculate_timeout(cbt, diff --git a/src/or/or.h b/src/or/or.h index 2d40b8233a..d1b3661789 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3018,11 +3018,6 @@ void entry_guards_free_all(void); * 1000 is approx 2.5 days worth of continual-use circuits. */ #define CBT_NCIRCUITS_TO_OBSERVE 1000 -/** Maximum quantile to use to generate synthetic timeouts. - * We want to stay a bit short of 100, because longtail is - * loooooooooooooooooooooooooooooooooooooooooooooooooooong. */ -#define CBT_DEFAULT_MAX_SYNTHETIC_QUANTILE 90 - /** Width of the histogram bins in milliseconds */ #define CBT_BIN_WIDTH ((build_time_t)50) @@ -3031,6 +3026,7 @@ void entry_guards_free_all(void); /** A build_time_t is milliseconds */ typedef uint32_t build_time_t; +#define CBT_BUILD_TIMEOUT ((build_time_t)(INT32_MAX-1)) #define CBT_BUILD_TIME_MAX ((build_time_t)(INT32_MAX)) /** Save state every 10 circuits */ @@ -3128,9 +3124,6 @@ typedef struct { network_liveness_t liveness; /** Last time we built a circuit. Used to decide to build new test circs */ time_t last_circ_at; - /** Number of timeouts that have happened before estimating pareto - * parameters */ - int pre_timeouts; /** "Minimum" value of our pareto distribution (actually mode) */ build_time_t Xm; /** alpha exponent for pareto dist. */ -- cgit v1.2.3-54-g00ecf From 38770dd6a580e1620f257042b73ea563fc11e862 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Mon, 7 Jun 2010 02:36:43 -0700 Subject: Add timeout count state variable. --- src/or/circuitbuild.c | 117 ++++++++++++++++++++++++++------------------------ src/or/config.c | 1 + src/or/or.h | 3 +- 3 files changed, 63 insertions(+), 58 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 7ee05d3471..f16c8b6bd9 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -470,6 +470,12 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, *next = NULL; state->TotalBuildTimes = cbt->total_build_times; + state->CircuitBuildTimeoutCount = 0; + + for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] == CBT_BUILD_TIMEOUT) + state->CircuitBuildTimeoutCount++; + } for (i = 0; i < nbins; i++) { // compress the histogram by skipping the blanks @@ -521,6 +527,40 @@ circuit_build_times_shuffle_and_store_array(circuit_build_times_t *cbt, } } +/** + * Filter old synthetic timeouts that were created before the + * right censored new Pareto calculation was deployed. + */ +static int +circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) +{ + int num_filtered=0, i=0; + double timeout_rate = 0; + build_time_t max_timeout = 0; + + timeout_rate = circuit_build_times_timeout_rate(cbt); + max_timeout = (build_time_t)cbt->timeout_ms; + + for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] > max_timeout) { + build_time_t replaced = cbt->circuit_build_times[i]; + num_filtered++; + cbt->circuit_build_times[i] = CBT_BUILD_TIMEOUT; + + log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced, + cbt->circuit_build_times[i]); + } + } + + log_info(LD_CIRC, + "We had %d timeouts out of %d build times, " + "and filtered %d above the max of %u", + (int)(cbt->total_build_times*timeout_rate), + cbt->total_build_times, num_filtered, max_timeout); + + return num_filtered; +} + /** * Load histogram from state, shuffling the resulting array * after we do so. Use this result to estimate parameters and @@ -535,7 +575,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, int tot_values = 0; uint32_t loaded_cnt = 0, N = 0; config_line_t *line; - int i; + unsigned int i; build_time_t *loaded_times; circuit_build_times_init(cbt); *msg = NULL; @@ -581,7 +621,8 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, break; } - if (loaded_cnt+count > state->TotalBuildTimes) { + if (loaded_cnt+count+state->CircuitBuildTimeoutCount + > state->TotalBuildTimes) { log_warn(LD_CIRC, "Too many build times in state file. " "Stopping short before %d", @@ -600,10 +641,17 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, } } + log_info(LD_CIRC, + "Adding %d timeouts.", state->CircuitBuildTimeoutCount); + for (i=0; i < state->CircuitBuildTimeoutCount; i++) { + loaded_times[loaded_cnt++] = CBT_BUILD_TIMEOUT; + } + if (loaded_cnt != state->TotalBuildTimes) { log_warn(LD_CIRC, "Corrupt state file? Build times count mismatch. " - "Read %d, file says %d", loaded_cnt, state->TotalBuildTimes); + "Read %d times, but file says %d", loaded_cnt, + state->TotalBuildTimes); } circuit_build_times_shuffle_and_store_array(cbt, loaded_times, loaded_cnt); @@ -619,7 +667,13 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, tot_values, cbt->total_build_times, N); tor_assert(cbt->total_build_times == tot_values); tor_assert(cbt->total_build_times <= CBT_NCIRCUITS_TO_OBSERVE); + circuit_build_times_set_timeout(cbt); + + if (!state->CircuitBuildTimeoutCount) { + circuit_build_times_filter_timeouts(cbt); + } + tor_free(loaded_times); return *msg ? -1 : 0; } @@ -676,6 +730,9 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) a += timeout_count*tor_mathlog(max_time); a -= n*tor_mathlog(cbt->Xm); + // Estimator comes from Eq #4 in: + // "Bayesian estimation based on trimmed samples from Pareto populations" + // by Arturo J. Fernández. We are right-censored only. a = (n-timeout_count)/a; cbt->alpha = a; @@ -1015,55 +1072,6 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt, return 1; } -static int -circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) -{ - int num_filtered=0, i=0; - double timeout_rate = 0; - build_time_t max_timeout = 0; - - /* If we replace high timeouts when the curve is really steep - * (alpha is above ~1.5), we can end up with a lot of timeouts clustered - * really close to the current timeout value, which makes the next - * alpha even higher, which makes us filter out more, and this becomes - * progressively worse until the timeout *is* Xm. Filtering should - * only be done on really shallow curves anyway. */ - if (cbt->alpha > 1.5) { - return 0; - } - - /* Do not filter for unit tests. They expect the distribution to be - * unchanged from run to run. */ - if (unit_tests) { - return 0; - } - - timeout_rate = circuit_build_times_timeout_rate(cbt); - max_timeout = /*(build_time_t)tor_lround(circuit_build_times_calculate_timeout(cbt, - circuit_build_times_max_synthetic_quantile()));*/ - (build_time_t)cbt->timeout_ms; - - /* XXX: Make a special bin for timeout values */ - for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { - if (cbt->circuit_build_times[i] > max_timeout) { - build_time_t replaced = cbt->circuit_build_times[i]; - num_filtered++; - cbt->circuit_build_times[i] = CBT_BUILD_TIMEOUT; - - log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced, - cbt->circuit_build_times[i]); - } - } - - log_info(LD_CIRC, - "We had %d timeouts out of %d build times, " - "and filtered %d above the max of %u", - (int)(cbt->total_build_times*timeout_rate), - cbt->total_build_times, num_filtered, max_timeout); - - return num_filtered; -} - /** * Estimate a new timeout based on history and set our timeout * variable accordingly. @@ -1091,16 +1099,11 @@ void circuit_build_times_set_timeout(circuit_build_times_t *cbt) { long prev_timeout = tor_lround(cbt->timeout_ms/1000); - int filtered=0; double timeout_rate; if (!circuit_build_times_set_timeout_worker(cbt)) return; - if ((filtered = circuit_build_times_filter_timeouts(cbt)) > 0) { - circuit_build_times_set_timeout_worker(cbt); - } - if (cbt->timeout_ms < circuit_build_times_min_timeout()) { log_warn(LD_CIRC, "Set buildtimeout to low value %lfms. Setting to %dms", cbt->timeout_ms, circuit_build_times_min_timeout()); diff --git a/src/or/config.c b/src/or/config.c index 77ef45ec42..c3b4fc933e 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -421,6 +421,7 @@ static config_var_t _state_vars[] = { V(LastWritten, ISOTIME, NULL), V(TotalBuildTimes, UINT, NULL), + V(CircuitBuildTimeoutCount, UINT, "0"), VAR("CircuitBuildTimeBin", LINELIST_S, BuildtimeHistogram, NULL), VAR("BuildtimeHistogram", LINELIST_V, BuildtimeHistogram, NULL), diff --git a/src/or/or.h b/src/or/or.h index d1b3661789..49abf552b6 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2841,7 +2841,8 @@ typedef struct { /** Build time histogram */ config_line_t * BuildtimeHistogram; - uint16_t TotalBuildTimes; + unsigned int TotalBuildTimes; + unsigned int CircuitBuildTimeoutCount; /** What version of Tor wrote this state file? */ char *TorVersion; -- cgit v1.2.3-54-g00ecf From f897154b26ffddf42f8c9b1988dbcb4355222c37 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Mon, 7 Jun 2010 21:17:12 -0700 Subject: Make the Xm mode selection a consensus parameter. --- src/or/circuitbuild.c | 16 +++++++++++++--- src/or/or.h | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index f16c8b6bd9..7dc544d47d 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -115,6 +115,14 @@ circuit_build_times_max_timeouts(void) return num; } +static int32_t +circuit_build_times_default_num_xm_modes(void) +{ + int32_t num = networkstatus_get_param(NULL, "cbtnummodes", + CBT_DEFAULT_NUM_XM_MODES); + return num; +} + static int32_t circuit_build_times_min_circs_to_observe(void) { @@ -410,19 +418,20 @@ static build_time_t circuit_build_times_get_xm(circuit_build_times_t *cbt) { build_time_t i, nbins; - build_time_t nth_max_bin[CBT_NUM_XM_MODES]; + build_time_t *nth_max_bin; int32_t bin_counts=0; build_time_t ret = 0; uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins); int n=0; - int num_modes = CBT_NUM_XM_MODES; + int num_modes = circuit_build_times_default_num_xm_modes(); // Only use one mode if < 1000 buildtimes. Not enough data // for multiple. if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE) num_modes = 1; - memset(nth_max_bin, 0, sizeof(nth_max_bin)); + nth_max_bin = (build_time_t*)tor_malloc_zero(num_modes*sizeof(build_time_t)); + for (i = 0; i < nbins; i++) { if (histogram[i] >= histogram[nth_max_bin[0]]) { nth_max_bin[0] = i; @@ -446,6 +455,7 @@ circuit_build_times_get_xm(circuit_build_times_t *cbt) ret /= bin_counts; tor_free(histogram); + tor_free(nth_max_bin); return ret; } diff --git a/src/or/or.h b/src/or/or.h index 49abf552b6..e99b2e91dd 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3023,7 +3023,7 @@ void entry_guards_free_all(void); #define CBT_BIN_WIDTH ((build_time_t)50) /** Number of modes to use in the weighted-avg computation of Xm */ -#define CBT_NUM_XM_MODES 3 +#define CBT_DEFAULT_NUM_XM_MODES 3 /** A build_time_t is milliseconds */ typedef uint32_t build_time_t; -- cgit v1.2.3-54-g00ecf From c96206090e5090433a7dbb4f5d52ad2efeb00c37 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 8 Jun 2010 01:01:47 -0700 Subject: Keep circuits open until the greater of 95th CDF percentile or 60s. This is done to provide better data to our right-censored Pareto model. We do this by simply marking them with a new purpose. --- src/or/circuitbuild.c | 43 +++++++++++++++++++++++++++++++++---------- src/or/circuituse.c | 23 ++++++++++++++++++++++- src/or/or.h | 28 ++++++++++++++++++---------- 3 files changed, 73 insertions(+), 21 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 7dc544d47d..90b4f523c7 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -139,6 +139,15 @@ circuit_build_times_quantile_cutoff(void) return num/100.0; } +static double +circuit_build_times_close_quantile(void) +{ + int32_t num = networkstatus_get_param(NULL, "cbtclosequantile", + CBT_DEFAULT_CLOSE_QUANTILE); + + return num/100.0; +} + static int32_t circuit_build_times_test_frequency(void) { @@ -539,7 +548,10 @@ circuit_build_times_shuffle_and_store_array(circuit_build_times_t *cbt, /** * Filter old synthetic timeouts that were created before the - * right censored new Pareto calculation was deployed. + * new right-censored Pareto calculation was deployed. + * + * Once all clients before 0.2.1.13-alpha are gone, this code + * will be unused. */ static int circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) @@ -549,7 +561,7 @@ circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) build_time_t max_timeout = 0; timeout_rate = circuit_build_times_timeout_rate(cbt); - max_timeout = (build_time_t)cbt->timeout_ms; + max_timeout = (build_time_t)cbt->close_ms; for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { if (cbt->circuit_build_times[i] > max_timeout) { @@ -1097,6 +1109,15 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt) cbt->timeout_ms = circuit_build_times_calculate_timeout(cbt, circuit_build_times_quantile_cutoff()); + + cbt->close_ms = circuit_build_times_calculate_timeout(cbt, + circuit_build_times_close_quantile()); + + /* Sometimes really fast guard nodes give us such a steep curve + * that this ends up being not that much greater than timeout_ms. + * Make it be at least 1 min to handle this case. */ + cbt->close_ms = MAX(cbt->close_ms, 60*1000); + cbt->have_computed_timeout = 1; return 1; } @@ -1132,8 +1153,9 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) cbt->total_build_times, tor_lround(cbt->timeout_ms/1000)); log_info(LD_CIRC, - "Circuit timeout data: %lfms, Xm: %d, a: %lf, r: %lf", - cbt->timeout_ms, cbt->Xm, cbt->alpha, timeout_rate); + "Circuit timeout data: %lfms, %lfms, Xm: %d, a: %lf, r: %lf", + cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, + timeout_rate); } else if (prev_timeout < tor_lround(cbt->timeout_ms/1000)) { log_notice(LD_CIRC, "Based on %d circuit times, it looks like we need to wait " @@ -1142,14 +1164,15 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) cbt->total_build_times, tor_lround(cbt->timeout_ms/1000)); log_info(LD_CIRC, - "Circuit timeout data: %lfms, Xm: %d, a: %lf, r: %lf", - cbt->timeout_ms, cbt->Xm, cbt->alpha, timeout_rate); + "Circuit timeout data: %lfms, %lfms, Xm: %d, a: %lf, r: %lf", + cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, + timeout_rate); } else { log_info(LD_CIRC, - "Set circuit build timeout to %lds (%lfms, Xm: %d, a: %lf, " - "r: %lf) based on %d circuit times", + "Set circuit build timeout to %lds (%lfms, %lfms, Xm: %d, a: %lf," + " r: %lf) based on %d circuit times", tor_lround(cbt->timeout_ms/1000), - cbt->timeout_ms, cbt->Xm, cbt->alpha, timeout_rate, + cbt->timeout_ms, cbt->close_ms, cbt->Xm, cbt->alpha, timeout_rate, cbt->total_build_times); } } @@ -1747,7 +1770,7 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) * it off at, we probably had a suspend event along this codepath, * and we should discard the value. */ - if (timediff < 0 || timediff > 2*circ_times.timeout_ms+1000) { + if (timediff < 0 || timediff > 2*circ_times.close_ms+1000) { log_notice(LD_CIRC, "Strange value for circuit build time: %ld. " "Assuming clock jump.", timediff); } else if (!circuit_build_times_disabled()) { diff --git a/src/or/circuituse.c b/src/or/circuituse.c index e7d10a22f2..5696d9c6f0 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -270,6 +270,7 @@ circuit_expire_building(time_t now) * decided on a customized one yet */ time_t general_cutoff = now - lround(circ_times.timeout_ms/1000); time_t begindir_cutoff = now - lround(circ_times.timeout_ms/2000); + time_t close_cutoff = now - lround(circ_times.close_ms/1000); time_t introcirc_cutoff = begindir_cutoff; cpath_build_state_t *build_state; @@ -286,8 +287,11 @@ circuit_expire_building(time_t now) cutoff = begindir_cutoff; else if (victim->purpose == CIRCUIT_PURPOSE_C_INTRODUCING) cutoff = introcirc_cutoff; + else if (victim->purpose == CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) + cutoff = close_cutoff; else cutoff = general_cutoff; + if (victim->timestamp_created > cutoff) continue; /* it's still young, leave it alone */ @@ -350,12 +354,29 @@ circuit_expire_building(time_t now) } else { /* circuit not open, consider recording failure as timeout */ int first_hop_succeeded = TO_ORIGIN_CIRCUIT(victim)->cpath && TO_ORIGIN_CIRCUIT(victim)->cpath->state == CPATH_STATE_OPEN; + + if (TO_ORIGIN_CIRCUIT(victim)->p_streams != NULL) { + log_warn(LD_BUG, "Circuit %d (purpose %d) has timed out, " + "yet has attached streams!", + TO_ORIGIN_CIRCUIT(victim)->global_identifier, + victim->purpose); + tor_fragile_assert(); + continue; + } + + /* circuits are allowed to last longer for measurement. + * Switch their purpose and wait. */ + if (victim->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) { + victim->purpose = CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT; + continue; + } + /* * If the circuit build time is much greater than we would have cut * it off at, we probably had a suspend event along this codepath, * and we should discard the value. */ - if (now - victim->timestamp_created > (2*circ_times.timeout_ms)/1000+1) { + if (now - victim->timestamp_created > 2*circ_times.close_ms/1000+1) { log_notice(LD_CIRC, "Extremely large value for circuit build timeout: %ld. " "Assuming clock jump.", now - victim->timestamp_created); diff --git a/src/or/or.h b/src/or/or.h index e99b2e91dd..8e8ea18694 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -467,23 +467,23 @@ typedef enum { #define CIRCUIT_PURPOSE_C_REND_READY_INTRO_ACKED 11 /** Client-side circuit purpose: at Alice, rendezvous established. */ #define CIRCUIT_PURPOSE_C_REND_JOINED 12 - -#define _CIRCUIT_PURPOSE_C_MAX 12 - +/** This circuit is used for build time measurement only */ +#define CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT 13 +#define _CIRCUIT_PURPOSE_C_MAX 13 /** Hidden-service-side circuit purpose: at Bob, waiting for introductions. */ -#define CIRCUIT_PURPOSE_S_ESTABLISH_INTRO 13 +#define CIRCUIT_PURPOSE_S_ESTABLISH_INTRO 14 /** Hidden-service-side circuit purpose: at Bob, successfully established * intro. */ -#define CIRCUIT_PURPOSE_S_INTRO 14 +#define CIRCUIT_PURPOSE_S_INTRO 15 /** Hidden-service-side circuit purpose: at Bob, connecting to rend point. */ -#define CIRCUIT_PURPOSE_S_CONNECT_REND 15 +#define CIRCUIT_PURPOSE_S_CONNECT_REND 16 /** Hidden-service-side circuit purpose: at Bob, rendezvous established. */ -#define CIRCUIT_PURPOSE_S_REND_JOINED 16 +#define CIRCUIT_PURPOSE_S_REND_JOINED 17 /** A testing circuit; not meant to be used for actual traffic. */ -#define CIRCUIT_PURPOSE_TESTING 17 +#define CIRCUIT_PURPOSE_TESTING 18 /** A controller made this circuit and Tor should not use it. */ -#define CIRCUIT_PURPOSE_CONTROLLER 18 -#define _CIRCUIT_PURPOSE_MAX 18 +#define CIRCUIT_PURPOSE_CONTROLLER 19 +#define _CIRCUIT_PURPOSE_MAX 19 /** A catch-all for unrecognized purposes. Currently we don't expect * to make or see any circuits with this purpose. */ #define CIRCUIT_PURPOSE_UNKNOWN 255 @@ -3055,6 +3055,12 @@ typedef uint32_t build_time_t; /* Circuit build times consensus parameters */ +/** + * How long to wait before actually closing circuits that take too long to + * build in terms of CDF quantile. + */ +#define CBT_DEFAULT_CLOSE_QUANTILE 95 + /** * How many circuits count as recent when considering if the * connection has gone gimpy or changed. @@ -3134,6 +3140,8 @@ typedef struct { /** The exact value for that timeout in milliseconds. Stored as a double * to maintain precision from calculations to and from quantile value. */ double timeout_ms; + /** How long we wait before actually closing the circuit. */ + double close_ms; } circuit_build_times_t; extern circuit_build_times_t circ_times; -- cgit v1.2.3-54-g00ecf From f528a6e62b75538a39b113f67e38fb947c80739e Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Mon, 14 Jun 2010 22:47:45 -0700 Subject: Fix initialization and reset issues with close_ms. Also clean up some log messages. --- src/or/circuitbuild.c | 56 +++++++++++++++++++++++++++++++++++++++------------ src/or/circuituse.c | 5 ++++- src/or/or.h | 3 +++ 3 files changed, 50 insertions(+), 14 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 90b4f523c7..b1335cc24c 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -285,7 +285,7 @@ circuit_build_times_init(circuit_build_times_t *cbt) cbt->liveness.num_recent_circs = circuit_build_times_recent_circuit_count(); cbt->liveness.timeouts_after_firsthop = tor_malloc_zero(sizeof(int8_t)* cbt->liveness.num_recent_circs); - cbt->timeout_ms = circuit_build_times_get_initial_timeout(); + cbt->close_ms = cbt->timeout_ms = circuit_build_times_get_initial_timeout(); control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET); } @@ -606,7 +606,8 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, return 0; } - loaded_times = tor_malloc(sizeof(build_time_t)*state->TotalBuildTimes); + /* build_time_t 0 means uninitialized */ + loaded_times = tor_malloc_zero(sizeof(build_time_t)*state->TotalBuildTimes); for (line = state->BuildtimeHistogram; line; line = line->next) { smartlist_t *args = smartlist_create(); @@ -692,7 +693,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, circuit_build_times_set_timeout(cbt); - if (!state->CircuitBuildTimeoutCount) { + if (!state->CircuitBuildTimeoutCount && cbt->total_build_times) { circuit_build_times_filter_timeouts(cbt); } @@ -924,10 +925,20 @@ circuit_build_times_network_timeout(circuit_build_times_t *cbt, * entire existence during a time where we have had no network activity. * * Also double check that it is a valid timeout after we have possibly - * just recently reset cbt->timeout_ms. + * just recently reset cbt->close_ms. + * + * We use close_ms here because timeouts aren't actually counted as timeouts + * until close_ms elapses. */ if (cbt->liveness.network_last_live <= start_time && - start_time <= (now - cbt->timeout_ms/1000.0)) { + start_time <= (now - cbt->close_ms/1000.0)) { + if (did_onehop) { + log_warn(LD_BUG, + "Circuit somehow completed a hop while the network was " + "not live. Network was last live at %ld, but circuit launched " + "at %ld. It's now %ld.", cbt->liveness.network_last_live, + start_time, now); + } cbt->liveness.nonlive_timeouts++; } else if (did_onehop) { /* Count a one-hop timeout */ @@ -970,7 +981,9 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt) (long int)(now - cbt->liveness.network_last_live), tor_lround(circuit_build_times_get_initial_timeout()/1000)); cbt->liveness.suspended_timeout = cbt->timeout_ms; - cbt->timeout_ms = circuit_build_times_get_initial_timeout(); + cbt->liveness.suspended_close_timeout = cbt->close_ms; + cbt->close_ms = cbt->timeout_ms + = circuit_build_times_get_initial_timeout(); control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_SUSPENDED); } @@ -980,7 +993,9 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt) "Network activity has resumed. " "Resuming circuit timeout calculations."); cbt->timeout_ms = cbt->liveness.suspended_timeout; + cbt->close_ms = cbt->liveness.suspended_close_timeout; cbt->liveness.suspended_timeout = 0; + cbt->liveness.suspended_close_timeout = 0; control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESUME); } @@ -995,6 +1010,8 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt) * Also resets the entire timeout history in this case and causes us * to restart the process of building test circuits and estimating a * new timeout. + * + * XXX: All this may have been broken by the close_ms change! */ int circuit_build_times_network_check_changed(circuit_build_times_t *cbt) @@ -1024,14 +1041,17 @@ circuit_build_times_network_check_changed(circuit_build_times_t *cbt) /* Check to see if this has happened before. If so, double the timeout * to give people on abysmally bad network connections a shot at access */ if (cbt->timeout_ms >= circuit_build_times_get_initial_timeout()) { - if (cbt->timeout_ms > INT32_MAX/2) { - log_warn(LD_CIRC, "Insanely large circuit build timeout value: %lf", - cbt->timeout_ms); + if (cbt->timeout_ms > INT32_MAX/2 || cbt->close_ms > INT32_MAX/2) { + log_warn(LD_CIRC, "Insanely large circuit build timeout value. " + "(timeout = %lfmsec, close = %lfmsec)", + cbt->timeout_ms, cbt->close_ms); } else { cbt->timeout_ms *= 2; + cbt->close_ms *= 2; } } else { - cbt->timeout_ms = circuit_build_times_get_initial_timeout(); + cbt->close_ms = cbt->timeout_ms + = circuit_build_times_get_initial_timeout(); } control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET); @@ -1058,6 +1078,9 @@ circuit_build_times_timeout_rate(const circuit_build_times_t *cbt) } } + if (!cbt->total_build_times) + return 0; + return ((double)timeouts)/cbt->total_build_times; } @@ -1073,10 +1096,12 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt, time_t start_time) { if (circuit_build_times_disabled()) { - cbt->timeout_ms = circuit_build_times_get_initial_timeout(); + cbt->close_ms = cbt->timeout_ms + = circuit_build_times_get_initial_timeout(); return 0; } + // XXX: All this has changed due to close_ms circuit_build_times_network_timeout(cbt, did_onehop, start_time); /* Only count timeouts if network is live.. */ @@ -1116,7 +1141,7 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt) /* Sometimes really fast guard nodes give us such a steep curve * that this ends up being not that much greater than timeout_ms. * Make it be at least 1 min to handle this case. */ - cbt->close_ms = MAX(cbt->close_ms, 60*1000); + cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout()); cbt->have_computed_timeout = 1; return 1; @@ -1139,6 +1164,11 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) log_warn(LD_CIRC, "Set buildtimeout to low value %lfms. Setting to %dms", cbt->timeout_ms, circuit_build_times_min_timeout()); cbt->timeout_ms = circuit_build_times_min_timeout(); + if (cbt->close_ms < cbt->timeout_ms) { + /* This shouldn't happen because of MAX() in timeout_worker above, + * but doing it just in case */ + cbt->close_ms = circuit_build_times_initial_timeout(); + } } control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_COMPUTED); @@ -1771,7 +1801,7 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) * and we should discard the value. */ if (timediff < 0 || timediff > 2*circ_times.close_ms+1000) { - log_notice(LD_CIRC, "Strange value for circuit build time: %ld. " + log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. " "Assuming clock jump.", timediff); } else if (!circuit_build_times_disabled()) { circuit_build_times_add_time(&circ_times, (build_time_t)timediff); diff --git a/src/or/circuituse.c b/src/or/circuituse.c index 5696d9c6f0..5127ca6e3d 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -378,7 +378,7 @@ circuit_expire_building(time_t now) */ if (now - victim->timestamp_created > 2*circ_times.close_ms/1000+1) { log_notice(LD_CIRC, - "Extremely large value for circuit build timeout: %ld. " + "Extremely large value for circuit build timeout: %lds. " "Assuming clock jump.", now - victim->timestamp_created); } else if (circuit_build_times_add_timeout(&circ_times, first_hop_succeeded, @@ -715,6 +715,9 @@ circuit_expire_old_circuits_clientside(time_t now) circ->n_circ_id, (int)(now - circ->timestamp_dirty), circ->purpose); circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED); + // XXX: Do we ever mark non-dirty odd-purpose circuits for close? + // XXX: See irc backlog. Want to log for those circuits not mentioned. + // But remember to add flag. this is called 1x/sec } else if (!circ->timestamp_dirty && circ->state == CIRCUIT_STATE_OPEN && circ->purpose == CIRCUIT_PURPOSE_C_GENERAL) { diff --git a/src/or/or.h b/src/or/or.h index 8e8ea18694..1f1ffe00eb 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3117,6 +3117,9 @@ typedef struct { /** Timeout gathering is suspended if non-zero. The old timeout value * is stored here in that case. */ double suspended_timeout; + /** Timeout gathering is suspended if non-zero. The old close value + * is stored here in that case. */ + double suspended_close_timeout; } network_liveness_t; /** Structure for circuit build times history */ -- cgit v1.2.3-54-g00ecf From c6c8fbf852bb2ecee6387609e8cbf78a772c7e01 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 15 Jun 2010 01:13:49 -0700 Subject: Split the circuit timeout and close codepaths. We need to record different statistics at point of timeout, vs the point of forcible closing. Also, give some better names to constants and state file variables to indicate they are not dealing with timeouts, but abandoned circuits. --- src/or/circuitbuild.c | 118 +++++++++++++++++++++++++++++++------------------- src/or/circuituse.c | 4 +- src/or/config.c | 2 +- src/or/or.h | 13 ++++-- src/test/test.c | 37 +++++++--------- 5 files changed, 103 insertions(+), 71 deletions(-) (limited to 'src/or') diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index b1335cc24c..f244aeaff0 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -290,7 +290,7 @@ circuit_build_times_init(circuit_build_times_t *cbt) } /** - * Rewind our timeout history by n timeout positions. + * Rewind our build time history by n positions. */ static void circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n) @@ -317,7 +317,7 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n) } /** - * Add a new timeout value time to the set of build times. Time + * Add a new build time value time to the set of build times. Time * units are milliseconds. * * circuit_build_times cbt is a circular array, so loop around when @@ -359,7 +359,7 @@ circuit_build_times_max(circuit_build_times_t *cbt) build_time_t max_build_time = 0; for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { if (cbt->circuit_build_times[i] > max_build_time - && cbt->circuit_build_times[i] != CBT_BUILD_TIMEOUT) + && cbt->circuit_build_times[i] != CBT_BUILD_ABANDONED) max_build_time = cbt->circuit_build_times[i]; } return max_build_time; @@ -407,7 +407,7 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt, // calculate histogram for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { if (cbt->circuit_build_times[i] == 0 - || cbt->circuit_build_times[i] == CBT_BUILD_TIMEOUT) + || cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED) continue; /* 0 <-> uninitialized */ c = (cbt->circuit_build_times[i] / CBT_BIN_WIDTH); @@ -489,11 +489,11 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, *next = NULL; state->TotalBuildTimes = cbt->total_build_times; - state->CircuitBuildTimeoutCount = 0; + state->CircuitBuildAbandonedCount = 0; for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { - if (cbt->circuit_build_times[i] == CBT_BUILD_TIMEOUT) - state->CircuitBuildTimeoutCount++; + if (cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED) + state->CircuitBuildAbandonedCount++; } for (i = 0; i < nbins; i++) { @@ -567,7 +567,7 @@ circuit_build_times_filter_timeouts(circuit_build_times_t *cbt) if (cbt->circuit_build_times[i] > max_timeout) { build_time_t replaced = cbt->circuit_build_times[i]; num_filtered++; - cbt->circuit_build_times[i] = CBT_BUILD_TIMEOUT; + cbt->circuit_build_times[i] = CBT_BUILD_ABANDONED; log_debug(LD_CIRC, "Replaced timeout %d with %d", replaced, cbt->circuit_build_times[i]); @@ -644,7 +644,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, break; } - if (loaded_cnt+count+state->CircuitBuildTimeoutCount + if (loaded_cnt+count+state->CircuitBuildAbandonedCount > state->TotalBuildTimes) { log_warn(LD_CIRC, "Too many build times in state file. " @@ -665,9 +665,9 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, } log_info(LD_CIRC, - "Adding %d timeouts.", state->CircuitBuildTimeoutCount); - for (i=0; i < state->CircuitBuildTimeoutCount; i++) { - loaded_times[loaded_cnt++] = CBT_BUILD_TIMEOUT; + "Adding %d timeouts.", state->CircuitBuildAbandonedCount); + for (i=0; i < state->CircuitBuildAbandonedCount; i++) { + loaded_times[loaded_cnt++] = CBT_BUILD_ABANDONED; } if (loaded_cnt != state->TotalBuildTimes) { @@ -693,7 +693,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, circuit_build_times_set_timeout(cbt); - if (!state->CircuitBuildTimeoutCount && cbt->total_build_times) { + if (!state->CircuitBuildAbandonedCount && cbt->total_build_times) { circuit_build_times_filter_timeouts(cbt); } @@ -715,7 +715,7 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) { build_time_t *x=cbt->circuit_build_times; double a = 0; - int n=0,i=0,timeout_count=0; + int n=0,i=0,abandoned_count=0; build_time_t max_time=0; /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */ @@ -732,8 +732,8 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) if (x[i] < cbt->Xm) { a += tor_mathlog(cbt->Xm); - } else if (x[i] == CBT_BUILD_TIMEOUT) { - timeout_count++; + } else if (x[i] == CBT_BUILD_ABANDONED) { + abandoned_count++; } else { a += tor_mathlog(x[i]); if (x[i] > max_time) @@ -750,13 +750,13 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) tor_assert(max_time > 0); - a += timeout_count*tor_mathlog(max_time); + a += abandoned_count*tor_mathlog(max_time); a -= n*tor_mathlog(cbt->Xm); // Estimator comes from Eq #4 in: // "Bayesian estimation based on trimmed samples from Pareto populations" // by Arturo J. Fernández. We are right-censored only. - a = (n-timeout_count)/a; + a = (n-abandoned_count)/a; cbt->alpha = a; } @@ -907,16 +907,27 @@ circuit_build_times_network_circ_success(circuit_build_times_t *cbt) } /** - * A circuit just timed out. If there has been no recent network activity - * at all, but this circuit was launched back when we thought the network - * was live, increment the number of "nonlive" circuit timeouts. - * - * Also distinguish between whether it failed before the first hop - * and record that in our history for later deciding if the network has - * changed. + * A circuit just timed out. If it failed after the first hop, record it + * in our history for later deciding if the network speed has changed. */ static void circuit_build_times_network_timeout(circuit_build_times_t *cbt, + int did_onehop) +{ + if (did_onehop) { + cbt->liveness.timeouts_after_firsthop[cbt->liveness.after_firsthop_idx]=1; + cbt->liveness.after_firsthop_idx++; + cbt->liveness.after_firsthop_idx %= cbt->liveness.num_recent_circs; + } +} + +/** + * A circuit was just forcibly closed. If there has been no recent network + * activity at all, but this circuit was launched back when we thought the + * network was live, increment the number of "nonlive" circuit timeouts. + */ +static void +circuit_build_times_network_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time) { time_t now = time(NULL); @@ -940,11 +951,6 @@ circuit_build_times_network_timeout(circuit_build_times_t *cbt, start_time, now); } cbt->liveness.nonlive_timeouts++; - } else if (did_onehop) { - /* Count a one-hop timeout */ - cbt->liveness.timeouts_after_firsthop[cbt->liveness.after_firsthop_idx]=1; - cbt->liveness.after_firsthop_idx++; - cbt->liveness.after_firsthop_idx %= cbt->liveness.num_recent_circs; } } @@ -1010,8 +1016,6 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt) * Also resets the entire timeout history in this case and causes us * to restart the process of building test circuits and estimating a * new timeout. - * - * XXX: All this may have been broken by the close_ms change! */ int circuit_build_times_network_check_changed(circuit_build_times_t *cbt) @@ -1091,7 +1095,7 @@ circuit_build_times_timeout_rate(const circuit_build_times_t *cbt) * update our timeout estimate. */ int -circuit_build_times_add_timeout(circuit_build_times_t *cbt, +circuit_build_times_count_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time) { @@ -1101,22 +1105,38 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt, return 0; } - // XXX: All this has changed due to close_ms - circuit_build_times_network_timeout(cbt, did_onehop, start_time); + /* Record this force-close to help determine if the network is dead */ + circuit_build_times_network_close(cbt, did_onehop, start_time); /* Only count timeouts if network is live.. */ if (!circuit_build_times_network_check_live(cbt)) { return 0; } - /* If there are a ton of timeouts, we should reduce - * the circuit build timeout */ - if (circuit_build_times_network_check_changed(cbt)) { - return 0; + circuit_build_times_add_time(cbt, CBT_BUILD_ABANDONED); + return 1; +} + +/** + * Update timeout counts to determine if we need to expire + * our build time history due to excessive timeouts. + */ +void +circuit_build_times_count_timeout(circuit_build_times_t *cbt, + int did_onehop) +{ + if (circuit_build_times_disabled()) { + cbt->close_ms = cbt->timeout_ms + = circuit_build_times_get_initial_timeout(); + return; } - circuit_build_times_add_time(cbt, CBT_BUILD_TIMEOUT); - return 1; + circuit_build_times_network_timeout(cbt, did_onehop); + + /* If there are a ton of timeouts, we should reset + * the circuit build timeout. + */ + circuit_build_times_network_check_changed(cbt); } /** @@ -1804,9 +1824,15 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. " "Assuming clock jump.", timediff); } else if (!circuit_build_times_disabled()) { - circuit_build_times_add_time(&circ_times, (build_time_t)timediff); - circuit_build_times_network_circ_success(&circ_times); - circuit_build_times_set_timeout(&circ_times); + /* Don't count circuit times if the network was not live */ + if (circuit_build_times_network_check_live(&circ_times)) { + circuit_build_times_add_time(&circ_times, (build_time_t)timediff); + circuit_build_times_set_timeout(&circ_times); + } + + if (circ->_base.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) { + circuit_build_times_network_circ_success(&circ_times); + } } } log_info(LD_CIRC,"circuit built!"); @@ -1829,6 +1855,10 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) } circuit_rep_hist_note_result(circ); circuit_has_opened(circ); /* do other actions as necessary */ + + /* We're done with measurement circuits here. Just close them */ + if (circ->_base.purpose == CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) + circuit_mark_for_close(TO_CIRCUIT(circ), END_CIRC_REASON_FINISHED); return 0; } diff --git a/src/or/circuituse.c b/src/or/circuituse.c index 5127ca6e3d..5027eb5a28 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -368,6 +368,8 @@ circuit_expire_building(time_t now) * Switch their purpose and wait. */ if (victim->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) { victim->purpose = CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT; + circuit_build_times_count_timeout(&circ_times, + first_hop_succeeded); continue; } @@ -380,7 +382,7 @@ circuit_expire_building(time_t now) log_notice(LD_CIRC, "Extremely large value for circuit build timeout: %lds. " "Assuming clock jump.", now - victim->timestamp_created); - } else if (circuit_build_times_add_timeout(&circ_times, + } else if (circuit_build_times_count_close(&circ_times, first_hop_succeeded, victim->timestamp_created)) { circuit_build_times_set_timeout(&circ_times); diff --git a/src/or/config.c b/src/or/config.c index c3b4fc933e..bcdfbd717a 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -421,7 +421,7 @@ static config_var_t _state_vars[] = { V(LastWritten, ISOTIME, NULL), V(TotalBuildTimes, UINT, NULL), - V(CircuitBuildTimeoutCount, UINT, "0"), + V(CircuitBuildAbandonedCount, UINT, "0"), VAR("CircuitBuildTimeBin", LINELIST_S, BuildtimeHistogram, NULL), VAR("BuildtimeHistogram", LINELIST_V, BuildtimeHistogram, NULL), diff --git a/src/or/or.h b/src/or/or.h index 1f1ffe00eb..e1483e6466 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2842,7 +2842,7 @@ typedef struct { /** Build time histogram */ config_line_t * BuildtimeHistogram; unsigned int TotalBuildTimes; - unsigned int CircuitBuildTimeoutCount; + unsigned int CircuitBuildAbandonedCount; /** What version of Tor wrote this state file? */ char *TorVersion; @@ -3027,7 +3027,12 @@ void entry_guards_free_all(void); /** A build_time_t is milliseconds */ typedef uint32_t build_time_t; -#define CBT_BUILD_TIMEOUT ((build_time_t)(INT32_MAX-1)) + +/** + * CBT_BUILD_ABANDONED is our flag value to represent a force-closed + * circuit (Aka a 'right-censored' pareto value). + */ +#define CBT_BUILD_ABANDONED ((build_time_t)(INT32_MAX-1)) #define CBT_BUILD_TIME_MAX ((build_time_t)(INT32_MAX)) /** Save state every 10 circuits */ @@ -3152,7 +3157,9 @@ void circuit_build_times_update_state(circuit_build_times_t *cbt, or_state_t *state); int circuit_build_times_parse_state(circuit_build_times_t *cbt, or_state_t *state, char **msg); -int circuit_build_times_add_timeout(circuit_build_times_t *cbt, +void circuit_build_times_count_timeout(circuit_build_times_t *cbt, + int did_onehop); +int circuit_build_times_count_close(circuit_build_times_t *cbt, int did_onehop, time_t start_time); void circuit_build_times_set_timeout(circuit_build_times_t *cbt); int circuit_build_times_add_time(circuit_build_times_t *cbt, diff --git a/src/test/test.c b/src/test/test.c index 6cfb2f76b6..fbcfedbc9a 100644 --- a/src/test/test.c +++ b/src/test/test.c @@ -520,20 +520,20 @@ test_circuit_timeout(void) int build_times_idx = 0; int total_build_times = 0; - final.timeout_ms = CBT_DEFAULT_TIMEOUT_INITIAL_VALUE; - estimate.timeout_ms = CBT_DEFAULT_TIMEOUT_INITIAL_VALUE; + final.close_ms = final.timeout_ms = CBT_DEFAULT_TIMEOUT_INITIAL_VALUE; + estimate.close_ms = estimate.timeout_ms + = CBT_DEFAULT_TIMEOUT_INITIAL_VALUE; for (i = 0; i < CBT_DEFAULT_RECENT_CIRCUITS*2; i++) { circuit_build_times_network_circ_success(&estimate); circuit_build_times_add_time(&estimate, circuit_build_times_generate_sample(&estimate, 0, CBT_DEFAULT_QUANTILE_CUTOFF/100.0)); - estimate.have_computed_timeout = 1; + circuit_build_times_network_circ_success(&estimate); circuit_build_times_add_time(&final, circuit_build_times_generate_sample(&final, 0, CBT_DEFAULT_QUANTILE_CUTOFF/100.0)); - final.have_computed_timeout = 1; } test_assert(!circuit_build_times_network_check_changed(&estimate)); @@ -549,26 +549,22 @@ test_circuit_timeout(void) test_assert(circuit_build_times_network_check_live(&estimate)); test_assert(circuit_build_times_network_check_live(&final)); - if (circuit_build_times_add_timeout(&estimate, 0, - (time_t)(approx_time()-estimate.timeout_ms/1000.0-1))) - estimate.have_computed_timeout = 1; - if (circuit_build_times_add_timeout(&final, 0, - (time_t)(approx_time()-final.timeout_ms/1000.0-1))) - final.have_computed_timeout = 1; + circuit_build_times_count_close(&estimate, 0, + (time_t)(approx_time()-estimate.close_ms/1000.0-1)); + circuit_build_times_count_close(&final, 0, + (time_t)(approx_time()-final.close_ms/1000.0-1)); } test_assert(!circuit_build_times_network_check_live(&estimate)); test_assert(!circuit_build_times_network_check_live(&final)); for ( ; i < CBT_NETWORK_NONLIVE_DISCARD_COUNT; i++) { - if (circuit_build_times_add_timeout(&estimate, 0, - (time_t)(approx_time()-estimate.timeout_ms/1000.0-1))) - estimate.have_computed_timeout = 1; + circuit_build_times_count_close(&estimate, 0, + (time_t)(approx_time()-estimate.close_ms/1000.0-1)); if (i < CBT_NETWORK_NONLIVE_DISCARD_COUNT-1) { - if (circuit_build_times_add_timeout(&final, 0, - (time_t)(approx_time()-final.timeout_ms/1000.0-1))) - final.have_computed_timeout = 1; + circuit_build_times_count_close(&final, 0, + (time_t)(approx_time()-final.close_ms/1000.0-1)); } } @@ -592,12 +588,10 @@ test_circuit_timeout(void) circuit_build_times_network_is_live(&final); for (i = 0; i < CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT; i++) { - if (circuit_build_times_add_timeout(&estimate, 1, approx_time()-1)) - estimate.have_computed_timeout = 1; + circuit_build_times_count_timeout(&estimate, 1); if (i < CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT-1) { - if (circuit_build_times_add_timeout(&final, 1, approx_time()-1)) - final.have_computed_timeout = 1; + circuit_build_times_count_timeout(&final, 1); } } @@ -608,8 +602,7 @@ test_circuit_timeout(void) test_assert(circuit_build_times_network_check_live(&estimate)); test_assert(circuit_build_times_network_check_live(&final)); - if (circuit_build_times_add_timeout(&final, 1, approx_time()-1)) - final.have_computed_timeout = 1; + circuit_build_times_count_timeout(&final, 1); } done: -- cgit v1.2.3-54-g00ecf From 82922ea45a24e7f79ac4c1ddf4d9de02c604546d Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 15 Jun 2010 14:46:01 -0700 Subject: Be more proactive about closing unused circuits. We need to ensure that we close timeout measurement circuits. While we're at it, we should close really old circuits of certain types that aren't in use, and log really old circuits of other types. --- src/or/circuituse.c | 30 ++++++++++++++++++++---------- src/or/or.h | 3 +++ 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'src/or') diff --git a/src/or/circuituse.c b/src/or/circuituse.c index 5027eb5a28..0187448aaa 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -717,17 +717,27 @@ circuit_expire_old_circuits_clientside(time_t now) circ->n_circ_id, (int)(now - circ->timestamp_dirty), circ->purpose); circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED); - // XXX: Do we ever mark non-dirty odd-purpose circuits for close? - // XXX: See irc backlog. Want to log for those circuits not mentioned. - // But remember to add flag. this is called 1x/sec - } else if (!circ->timestamp_dirty && - circ->state == CIRCUIT_STATE_OPEN && - circ->purpose == CIRCUIT_PURPOSE_C_GENERAL) { + } else if (!circ->timestamp_dirty && circ->state == CIRCUIT_STATE_OPEN) { if (circ->timestamp_created < cutoff) { - log_debug(LD_CIRC, - "Closing circuit that has been unused for %d seconds.", - (int)(now - circ->timestamp_created)); - circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED); + if (circ->purpose == CIRCUIT_PURPOSE_C_GENERAL || + circ->purpose == CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT || + circ->purpose == CIRCUIT_PURPOSE_S_ESTABLISH_INTRO || + circ->purpose == CIRCUIT_PURPOSE_TESTING || + (circ->purpose >= CIRCUIT_PURPOSE_C_INTRODUCING && + circ->purpose <= CIRCUIT_PURPOSE_C_REND_READY_INTRO_ACKED) || + circ->purpose == CIRCUIT_PURPOSE_S_CONNECT_REND) { + log_debug(LD_CIRC, + "Closing circuit that has been unused for %d seconds.", + (int)(now - circ->timestamp_created)); + circuit_mark_for_close(circ, END_CIRC_REASON_FINISHED); + } else if (!TO_ORIGIN_CIRCUIT(circ)->is_ancient) { + log_notice(LD_CIRC, + "Ancient non-dirty circuit %d is still around after " + "%ld seconds.", + TO_ORIGIN_CIRCUIT(circ)->global_identifier, + now - circ->timestamp_created); + TO_ORIGIN_CIRCUIT(circ)->is_ancient = 1; + } } } } diff --git a/src/or/or.h b/src/or/or.h index e1483e6466..cd6841522b 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2157,6 +2157,9 @@ typedef struct origin_circuit_t { * to the specification? */ unsigned int remaining_relay_early_cells : 4; + /** Set if this circuit insanely old and if we already informed the user */ + unsigned int is_ancient : 1; + /** What commands were sent over this circuit that decremented the * RELAY_EARLY counter? This is for debugging task 878. */ uint8_t relay_early_commands[MAX_RELAY_EARLY_CELLS_PER_CIRCUIT]; -- cgit v1.2.3-54-g00ecf From 2abe1ceccfacaa8deca3b7bb4caeb550572efd7f Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 15 Jun 2010 18:02:19 -0700 Subject: Add CLOSE_MS and CLOSE_RATE keywords to buildtimeout event. --- doc/spec/control-spec.txt | 8 ++++++-- src/or/circuitbuild.c | 19 +++++++++++++++++++ src/or/control.c | 6 ++++-- src/or/or.h | 1 + 4 files changed, 30 insertions(+), 4 deletions(-) (limited to 'src/or') diff --git a/doc/spec/control-spec.txt b/doc/spec/control-spec.txt index 1938fa226c..419db3a715 100644 --- a/doc/spec/control-spec.txt +++ b/doc/spec/control-spec.txt @@ -1670,14 +1670,18 @@ The syntax is: "650" SP "BUILDTIMEOUT_SET" SP Type SP "TOTAL_TIMES=" Total SP "TIMEOUT_MS=" Timeout SP "XM=" Xm SP "ALPHA=" Alpha SP - "CUTOFF_QUANTILE=" Quantile SP "TIMEOUT_RATE=" Rate CRLF + "CUTOFF_QUANTILE=" Quantile SP "TIMEOUT_RATE=" TimeoutRate SP + "CLOSE_MS=" CloseTimeout SP "CLOSE_RATE=" CloseRate + CRLF Type = "COMPUTED" / "RESET" / "SUSPENDED" / "DISCARD" / "RESUME" Total = Integer count of timeouts stored Timeout = Integer timeout in milliseconds Xm = Estimated integer Pareto parameter Xm in milliseconds Alpha = Estimated floating point Paredo paremter alpha Quantile = Floating point CDF quantile cutoff point for this timeout - Rate = Floating point ratio of circuits that timeout + TimeoutRate = Floating point ratio of circuits that timeout + CloseTimeout = How long to keep measurement circs in milliseconds + CloseRate = Floating point ratio of measurement circuits that are closed A new circuit build timeout time has been set. If Type is "COMPUTED", Tor has computed the value based on historical data. If Type is "RESET", diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index f244aeaff0..ddc7da4b38 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -1088,6 +1088,25 @@ circuit_build_times_timeout_rate(const circuit_build_times_t *cbt) return ((double)timeouts)/cbt->total_build_times; } +/** + * Count the number of closed circuits in a set of cbt data. + */ +double +circuit_build_times_close_rate(const circuit_build_times_t *cbt) +{ + int i=0,closed=0; + for (i = 0; i < CBT_NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] == CBT_BUILD_ABANDONED) { + closed++; + } + } + + if (!cbt->total_build_times) + return 0; + + return ((double)closed)/cbt->total_build_times; +} + /** * Store a timeout as a synthetic value. * diff --git a/src/or/control.c b/src/or/control.c index b5cc6c7b6d..8cfebe6477 100644 --- a/src/or/control.c +++ b/src/or/control.c @@ -3530,11 +3530,13 @@ control_event_buildtimeout_set(const circuit_build_times_t *cbt, send_control_event(EVENT_BUILDTIMEOUT_SET, ALL_FORMATS, "650 BUILDTIMEOUT_SET %s TOTAL_TIMES=%lu " "TIMEOUT_MS=%lu XM=%lu ALPHA=%lf CUTOFF_QUANTILE=%lf " - "TIMEOUT_RATE=%lf\r\n", + "TIMEOUT_RATE=%lf CLOSE_MS=%lu CLOSE_RATE=%lf\r\n", type_string, (unsigned long)cbt->total_build_times, (unsigned long)cbt->timeout_ms, (unsigned long)cbt->Xm, cbt->alpha, qnt, - circuit_build_times_timeout_rate(cbt)); + circuit_build_times_timeout_rate(cbt), + (unsigned long)cbt->close_ms, + circuit_build_times_close_rate(cbt)); return 0; } diff --git a/src/or/or.h b/src/or/or.h index cd6841522b..65468cfe38 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3173,6 +3173,7 @@ void circuit_build_times_init(circuit_build_times_t *cbt); void circuit_build_times_new_consensus_params(circuit_build_times_t *cbt, networkstatus_t *ns); double circuit_build_times_timeout_rate(const circuit_build_times_t *cbt); +double circuit_build_times_close_rate(const circuit_build_times_t *cbt); #ifdef CIRCUIT_PRIVATE double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, -- cgit v1.2.3-54-g00ecf From 5dbf99d9ff59e69c064acda31495486635f8b844 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Wed, 16 Jun 2010 18:13:27 -0700 Subject: Mention the purpose field for leaked circuits. --- src/or/circuituse.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/or') diff --git a/src/or/circuituse.c b/src/or/circuituse.c index 0187448aaa..c1b39b18e7 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -733,9 +733,10 @@ circuit_expire_old_circuits_clientside(time_t now) } else if (!TO_ORIGIN_CIRCUIT(circ)->is_ancient) { log_notice(LD_CIRC, "Ancient non-dirty circuit %d is still around after " - "%ld seconds.", + "%ld seconds. Purpose: %d", TO_ORIGIN_CIRCUIT(circ)->global_identifier, - now - circ->timestamp_created); + now - circ->timestamp_created, + circ->purpose); TO_ORIGIN_CIRCUIT(circ)->is_ancient = 1; } } -- cgit v1.2.3-54-g00ecf