diff options
author | Mike Perry <mikeperry-git@torproject.org> | 2020-11-06 16:30:16 -0600 |
---|---|---|
committer | Mike Perry <mikeperry-git@torproject.org> | 2021-02-18 11:21:25 -0600 |
commit | ed9d60cb92efbf5bc2a3c22ade6ffee4e1c8116a (patch) | |
tree | db6ccc30557aa4229b3b26e12c4b0afe7df7f7a4 /src | |
parent | 406400a74d917ec997d26afdf4ae97d9826820fe (diff) | |
download | tor-ed9d60cb92efbf5bc2a3c22ade6ffee4e1c8116a.tar.gz tor-ed9d60cb92efbf5bc2a3c22ade6ffee4e1c8116a.zip |
Fix Xm mode calculation to properly average N=10 modes.
This is still fast enough. ~100usec on my laptop with 1000 build times.
Diffstat (limited to 'src')
-rw-r--r-- | src/core/or/circuitstats.c | 61 | ||||
-rw-r--r-- | src/core/or/circuitstats.h | 2 |
2 files changed, 26 insertions, 37 deletions
diff --git a/src/core/or/circuitstats.c b/src/core/or/circuitstats.c index d92f35fdc2..57d45d2240 100644 --- a/src/core/or/circuitstats.c +++ b/src/core/or/circuitstats.c @@ -853,58 +853,49 @@ circuit_build_times_create_histogram(const circuit_build_times_t *cbt, * Return the Pareto start-of-curve parameter Xm. * * Because we are not a true Pareto curve, we compute this as the - * weighted average of the N most frequent build time bins. N is either - * 1 if we don't have enough circuit build time data collected, or - * determined by the consensus parameter cbtnummodes (default 3). + * weighted average of the 10 most frequent build time bins. This + * heuristic allowed for the actual timeout rate to be closest + * to the chosen quantile cutoff, for quantiles 60-80%, out of + * many variant approaches (see #40157 for analysis). */ static build_time_t circuit_build_times_get_xm(circuit_build_times_t *cbt) { - build_time_t i, nbins; + build_time_t nbins = 0; build_time_t *nth_max_bin; - int32_t bin_counts=0; - build_time_t ret = 0; - uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins); - int n=0; + build_time_t xm_total = 0; + build_time_t Xm = 0; + int32_t xm_counts=0; int num_modes = circuit_build_times_default_num_xm_modes(); + uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins); tor_assert(nbins > 0); tor_assert(num_modes > 0); - // Only use one mode if < 1000 buildtimes. Not enough data - // for multiple. - if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE) - num_modes = 1; - nth_max_bin = tor_calloc(num_modes, sizeof(build_time_t)); - /* Determine the N most common build times */ - for (i = 0; i < nbins; i++) { - if (histogram[i] >= histogram[nth_max_bin[0]]) { - nth_max_bin[0] = i; - } - - for (n = 1; n < num_modes; n++) { - if (histogram[i] >= histogram[nth_max_bin[n]] && - (!histogram[nth_max_bin[n-1]] - || histogram[i] < histogram[nth_max_bin[n-1]])) { + /* Determine the N most common build times, by selecting the + * nth largest mode, counting it, and removing it from the histogram. */ + for (int n = 0; n < num_modes; n++) { + /* Get nth mode */ + for (build_time_t i = 0; i < nbins; i++) { + if (histogram[i] > histogram[nth_max_bin[n]]) { nth_max_bin[n] = i; } } - } - for (n = 0; n < num_modes; n++) { - bin_counts += histogram[nth_max_bin[n]]; - ret += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]]; - log_info(LD_CIRC, "Xm mode #%d: %u %u", n, CBT_BIN_TO_MS(nth_max_bin[n]), - histogram[nth_max_bin[n]]); + /* Update average */ + xm_counts += histogram[nth_max_bin[n]]; + xm_total += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]]; + + /* Prevent from re-counting this value */ + histogram[nth_max_bin[n]] = 0; } - /* bin_counts can become zero if all of our last CBT_NCIRCUITS_TO_OBSERVE + /* xm_counts can become zero if all of our last CBT_NCIRCUITS_TO_OBSERVE * circuits were abandoned before they completed. This shouldn't happen, * though. We should have reset/re-learned a lower timeout first. */ - if (bin_counts == 0) { - ret = 0; + if (xm_counts == 0) { log_warn(LD_CIRC, "No valid circuit build time data out of %d times, %u modes, " "have_timeout=%d, %lfms", cbt->total_build_times, num_modes, @@ -912,15 +903,13 @@ circuit_build_times_get_xm(circuit_build_times_t *cbt) goto done; } - tor_assert(bin_counts > 0); - - ret /= bin_counts; + Xm = xm_total / xm_counts; done: tor_free(histogram); tor_free(nth_max_bin); - return ret; + return Xm; } /** diff --git a/src/core/or/circuitstats.h b/src/core/or/circuitstats.h index 15fe23641e..864b3b5f43 100644 --- a/src/core/or/circuitstats.h +++ b/src/core/or/circuitstats.h @@ -58,7 +58,7 @@ void circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t *circ); #define CBT_BIN_WIDTH ((build_time_t)10) /** Number of modes to use in the weighted-avg computation of Xm */ -#define CBT_DEFAULT_NUM_XM_MODES 3 +#define CBT_DEFAULT_NUM_XM_MODES 10 #define CBT_MIN_NUM_XM_MODES 1 #define CBT_MAX_NUM_XM_MODES 20 |