diff options
author | Mike Perry <mikeperry-git@fscked.org> | 2010-05-04 14:42:37 -0700 |
---|---|---|
committer | Mike Perry <mikeperry-git@fscked.org> | 2010-05-10 12:46:49 -0700 |
commit | cc2a48f1be7a3bcd7e31b969a62ed1279c521682 (patch) | |
tree | a5aa6ea29acfa309a003d5ab39849e5f3624cb3b /src | |
parent | 0c030b0b2fe03b68d03f6579c1c622d16ad27119 (diff) | |
download | tor-cc2a48f1be7a3bcd7e31b969a62ed1279c521682.tar.gz tor-cc2a48f1be7a3bcd7e31b969a62ed1279c521682.zip |
Bug 1335: Alter Xm calculation to be weighted avg of top N=3 modes.
In my state files, I was seeing several peaks, probably due to different
guards having different latency. This change is meant to better capture
this behavior and generate more reasonable timeouts when it happens. It
is improving the timeout values for my collection of state files.
Diffstat (limited to 'src')
-rw-r--r-- | src/or/circuitbuild.c | 50 | ||||
-rw-r--r-- | src/or/or.h | 3 |
2 files changed, 43 insertions, 10 deletions
diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 233d60f15c..98f6baf2bd 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -20,6 +20,8 @@ #define MIN(a,b) ((a)<(b)?(a):(b)) #endif +#define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2)) + /********* START VARIABLES **********/ /** Global list of circuit build times */ // FIXME: Add this as a member for entry_guard_t instead of global? @@ -387,25 +389,53 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt, } /** - * Return the most frequent build time (rounded to CBT_BIN_WIDTH ms). + * Return the Pareto start-of-curve parameter Xm. * - * Ties go in favor of the slower time. + * Because we are not a true Pareto curve, we compute this as the + * weighted average of the N=3 most frequent build time bins. */ static build_time_t -circuit_build_times_mode(circuit_build_times_t *cbt) +circuit_build_times_get_xm(circuit_build_times_t *cbt) { - build_time_t i, nbins, max_bin=0; + build_time_t i, nbins; + build_time_t nth_max_bin[CBT_NUM_XM_MODES]; + int32_t bin_counts=0; + build_time_t ret = 0; uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins); + int n=0; + int num_modes = CBT_NUM_XM_MODES; + + // Only use one mode if < 1000 buildtimes. Not enough data + // for multiple. + if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE) + num_modes = 1; + memset(nth_max_bin, 0, sizeof(nth_max_bin)); for (i = 0; i < nbins; i++) { - if (histogram[i] >= histogram[max_bin]) { - max_bin = i; + if (histogram[i] >= histogram[nth_max_bin[0]]) { + nth_max_bin[0] = i; } + + for (n = 1; n < num_modes; n++) { + if (histogram[i] >= histogram[nth_max_bin[n]] && + (!histogram[nth_max_bin[n-1]] + || histogram[i] < histogram[nth_max_bin[n-1]])) { + nth_max_bin[n] = i; + } + } + } + + for (n = 0; n < num_modes; n++) { + bin_counts += histogram[nth_max_bin[n]]; + ret += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]]; + log_info(LD_CIRC, "Xm mode #%d: %u %u", n, CBT_BIN_TO_MS(nth_max_bin[n]), + histogram[nth_max_bin[n]]); } + ret /= bin_counts; tor_free(histogram); - return max_bin*CBT_BIN_WIDTH+CBT_BIN_WIDTH/2; + return ret; } /** @@ -436,7 +466,7 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, line->key = tor_strdup("CircuitBuildTimeBin"); line->value = tor_malloc(25); tor_snprintf(line->value, 25, "%d %d", - i*CBT_BIN_WIDTH+CBT_BIN_WIDTH/2, histogram[i]); + CBT_BIN_TO_MS(i), histogram[i]); next = &(line->next); } @@ -596,7 +626,7 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */ /* We sort of cheat here and make our samples slightly more pareto-like * and less frechet-like. */ - cbt->Xm = circuit_build_times_mode(cbt); + cbt->Xm = circuit_build_times_get_xm(cbt); for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) { if (!x[i]) { @@ -763,7 +793,7 @@ circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) (cbt->pre_timeouts+cbt->total_build_times); /* Make sure it doesn't exceed the synthetic max */ timeout_quantile *= CBT_MAX_SYNTHETIC_QUANTILE; - cbt->Xm = circuit_build_times_mode(cbt); + cbt->Xm = circuit_build_times_get_xm(cbt); tor_assert(cbt->Xm > 0); /* Use current timeout to get an estimate on alpha */ circuit_build_times_initial_alpha(cbt, timeout_quantile, diff --git a/src/or/or.h b/src/or/or.h index 9c613d28d1..0d35de1be0 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3023,6 +3023,9 @@ void entry_guards_free_all(void); /** Width of the histogram bins in milliseconds */ #define CBT_BIN_WIDTH ((build_time_t)50) +/** Number of modes to use in the weighted-avg computation of Xm */ +#define CBT_NUM_XM_MODES (3) + /** A build_time_t is milliseconds */ typedef uint32_t build_time_t; #define CBT_BUILD_TIME_MAX ((build_time_t)(INT32_MAX)) |