From 7750bee21dda817611afd936558834bb21411301 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 25 Aug 2009 17:13:12 -0700 Subject: Clean up Fallon's partially complete GSoC project. The code actually isn't that bad. It's a shame she didn't finish. Using it as the base for this feature. --- src/or/circuitbuild.c | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/or/circuitlist.c | 1 + src/or/config.c | 17 ++++++ src/or/or.h | 18 ++++++ 4 files changed, 196 insertions(+) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index ef9d24c853..7334308403 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -12,6 +12,11 @@ #include "or.h" /********* START VARIABLES **********/ +/** Global list of circuit build times */ +// XXX: Make this a smartlist.. +uint16_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; +int build_times_idx = 0; +int total_build_times = 0; /** A global list of all circuits at this hop. */ extern circuit_t *global_circuitlist; @@ -60,6 +65,156 @@ static int onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice); static void entry_guards_changed(void); static time_t start_of_month(time_t when); +static int circuit_build_times_add_time(time_t time); +/** circuit_build_times is a circular array, so loop around when + * array is full + * + * time units are milliseconds + */ +static +int +circuit_build_times_add_time(long time) +{ + if(time > UINT16_MAX) { + log_notice(LD_CIRC, + "Circuit build time of %dms exceeds max. Capping at 65536ms", time); + time = UINT16_MAX; + } + circuit_build_times[build_times_idx] = time; + build_times_idx = (build_times_idx + 1) % NCIRCUITS_TO_OBSERVE; + if(total_build_times + 1 < NCIRCUITS_TO_OBSERVE) + total_build_times++; + + return 0; +} + +/** + * Calculate histogram + */ +void +circuit_build_times_create_histogram(uint16_t * histogram) +{ + int i, c; + // calculate histogram + for(i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { + if(circuit_build_times[i] == 0) continue; /* 0 <-> uninitialized */ + + c = (circuit_build_times[i] / BUILDTIME_BIN_WIDTH); + histogram[c]++; + } +} + +/** + * Find maximum circuit build time + */ +uint16_t +circuit_build_times_max() +{ + int i = 0, max_build_time = 0; + for( i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { + if(circuit_build_times[i] > max_build_time) + max_build_time = circuit_build_times[i]; + } + return max_build_time; +} + +uint16_t +circuit_build_times_min() +{ + int i = 0; + uint16_t min_build_time = UINT16_MAX; + for( i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { + if(circuit_build_times[i] && /* 0 <-> uninitialized */ + circuit_build_times[i] < min_build_time) + min_build_time = circuit_build_times[i]; + } + return min_build_time; +} + +/** + * output a histogram of current circuit build times + */ +void +circuit_build_times_update_state(or_state_t * state) +{ + uint16_t max_build_time = 0, *histogram; + int i = 0, nbins = 0; + config_line_t **next, *line; + + max_build_time = circuit_build_times_max(); + nbins = 1 + (max_build_time / BUILDTIME_BIN_WIDTH); + histogram = tor_malloc_zero(nbins * sizeof(uint16_t)); + + circuit_build_times_create_histogram(histogram); + // write to state + config_free_lines(state->BuildtimeHistogram); + next = &state->BuildtimeHistogram; + *next = NULL; + + state->TotalBuildTimes = total_build_times; + + // total build times? + for(i = 0; i < nbins; i++) { + if(histogram[i] == 0) continue; // compress the histogram by skipping the blanks + *next = line = tor_malloc_zero(sizeof(config_line_t)); + line->key = tor_strdup("CircuitBuildTimeBin"); + line->value = tor_malloc(20); + tor_snprintf(line->value, 20, "%d %d", i*BUILDTIME_BIN_WIDTH, + histogram[i]); + next = &(line->next); + } + if(!get_options()->AvoidDiskWrites) + or_state_mark_dirty(get_or_state(), 0); + + if(histogram) tor_free(histogram); +} + +int +find_next_available(int chosen) +{// find index of next open slot in circuit_build_times + int idx = 0; + for(idx = (chosen + 1) % NCIRCUITS_TO_OBSERVE; idx < chosen; + idx = ((idx + 1 ) % NCIRCUITS_TO_OBSERVE)) { + if(circuit_build_times[idx] == 0) { + return idx; + } + } + return 0; +} + +/** Load histogram from state */ +int +circuit_build_times_parse_state(or_state_t *state, char **msg) +{ + config_line_t *line; + msg = NULL; + memset(circuit_build_times, 0, NCIRCUITS_TO_OBSERVE); + total_build_times = state->TotalBuildTimes; + + for(line = state->BuildtimeHistogram; line; line = line->next) { + smartlist_t * args = smartlist_create(); + smartlist_split_string(args, line->value, " ", + SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0); + if(smartlist_len(args) < 2) { + *msg = tor_strdup("Unable to parse circuit build times: " + "Too few arguments to CircuitBuildTIme"); + break; + } else { + uint16_t ms, count, i; + /* XXX: use tor_strtol */ + ms = atol(smartlist_get(args,0)); + count = atol(smartlist_get(args,1)); + for(i = 0; i < count; i++) { + circuit_build_times_add_time(ms); + } + } + } + return (msg ? -1 : 0); +} + + + + /** Iterate over values of circ_id, starting from conn-\>next_circ_id, * and with the high bit specified by conn-\>circ_id_type, until we get * a circ_id that is not in use by any other circuit on that conn. @@ -641,8 +796,13 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) log_debug(LD_CIRC,"starting to send subsequent skin."); hop = onion_next_hop_in_cpath(circ->cpath); if (!hop) { + struct timeval end; + tor_gettimeofday(&end); /* done building the circuit. whew. */ circuit_set_state(TO_CIRCUIT(circ), CIRCUIT_STATE_OPEN); + circuit_build_times_add_time(tor_mdiff(&circ->_base.timestamp_created, + &end)); + circuit_build_times_recompute(); log_info(LD_CIRC,"circuit built!"); circuit_reset_failure_count(0); if (circ->build_state->onehop_tunnel) diff --git a/src/or/circuitlist.c b/src/or/circuitlist.c index e1da117168..54bda94001 100644 --- a/src/or/circuitlist.c +++ b/src/or/circuitlist.c @@ -379,6 +379,7 @@ static void init_circuit_base(circuit_t *circ) { circ->timestamp_created = time(NULL); + tor_gettimeofday(&circ->highres_created); circ->package_window = circuit_initial_package_window(); circ->deliver_window = CIRCWINDOW_START; diff --git a/src/or/config.c b/src/or/config.c index d830229d3b..0345ca7281 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -409,6 +409,11 @@ static config_var_t _state_vars[] = { V(LastRotatedOnionKey, ISOTIME, NULL), V(LastWritten, ISOTIME, NULL), + VAR("TotalBuildTimes", UINT, TotalBuildTimes, NULL), + VAR("CircuitBuildTimeBin", LINELIST_S, BuildtimeHistogram, NULL), + VAR("BuildtimeHistogram", LINELIST_V, BuildtimeHistogram, NULL), + + { NULL, CONFIG_TYPE_OBSOLETE, 0, NULL } }; @@ -597,6 +602,10 @@ static config_var_description_t options_description[] = { /* Hidden service options: HiddenService: dir,excludenodes, nodes, * options, port. PublishHidServDescriptor */ + /* Circuit build time histogram options */ + { "CircuitBuildTimeBin", "Histogram of recent circuit build times"}, + { "TotalBuildTimes", "Total number of buildtimes in histogram"}, + /* Nonpersistent options: __LeaveStreamsUnattached, __AllDirActionsPrivate */ { NULL, NULL }, }; @@ -5060,6 +5069,13 @@ or_state_set(or_state_t *new_state) log_warn(LD_GENERAL,"Unparseable bandwidth history state: %s",err); tor_free(err); } + + if(circuit_build_times_parse_state(global_state, &err) < 0) { + log_warn(LD_GENERAL,"%s",err); + tor_free(err); + + } + } /** Reload the persistent state from disk, generating a new state as needed. @@ -5192,6 +5208,7 @@ or_state_save(time_t now) * to avoid redundant writes. */ entry_guards_update_state(global_state); rep_hist_update_state(global_state); + circuit_build_times_update_state(global_state); if (accounting_is_enabled(get_options())) accounting_run_housekeeping(now); diff --git a/src/or/or.h b/src/or/or.h index 8587ea61fc..be54ab4bf7 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1884,6 +1884,13 @@ typedef struct crypt_path_t { DH_KEY_LEN) #define ONIONSKIN_REPLY_LEN (DH_KEY_LEN+DIGEST_LEN) +// XXX: Do we want to artifically tweak CircuitIdleTimeout and +// the number of circuits we build at a time if < MIN here? +#define MIN_CIRCUITS_TO_OBSERVE 1000 +#define NCIRCUITS_TO_OBSERVE 10000 /* approx 3 weeks worth of circuits */ +#define BUILDTIME_BIN_WIDTH 50 + + /** Information used to build a circuit. */ typedef struct { /** Intended length of the final circuit. */ @@ -1977,6 +1984,7 @@ typedef struct circuit_t { time_t timestamp_created; /**< When was this circuit created? */ time_t timestamp_dirty; /**< When the circuit was first used, or 0 if the * circuit is clean. */ + struct timeval highres_created; /**< When exactly was this circuit created? */ uint16_t marked_for_close; /**< Should we close this circuit at the end of * the main loop? (If true, holds the line number @@ -2683,6 +2691,11 @@ typedef struct { int BWHistoryWriteInterval; smartlist_t *BWHistoryWriteValues; + /** Build time histogram */ + config_line_t * BuildtimeHistogram; + uint16_t TotalBuildTimes; + + /** What version of Tor wrote this state file? */ char *TorVersion; @@ -2852,6 +2865,11 @@ void bridges_retry_all(void); void entry_guards_free_all(void); +void circuit_build_times_update_state(or_state_t *state); +int circuit_build_times_parse_state(or_state_t *state, char **msg); + + + /********************************* circuitlist.c ***********************/ circuit_t * _circuit_get_global_list(void); -- cgit v1.2.3-54-g00ecf From 04414830fe199d80bddf67c64e17d32d54a385e4 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Thu, 27 Aug 2009 01:46:06 -0700 Subject: Implement the pareto fitting and timeout calculating bits. --- src/or/Makefile.am | 4 +- src/or/circuitbuild.c | 285 +++++++++++++++++++++++++++++++++++++------------- src/or/circuituse.c | 2 + src/or/config.c | 7 +- src/or/or.h | 28 +++-- 5 files changed, 244 insertions(+), 82 deletions(-) diff --git a/src/or/Makefile.am b/src/or/Makefile.am index 7d6c9eb0b9..e9916d5188 100644 --- a/src/or/Makefile.am +++ b/src/or/Makefile.am @@ -41,14 +41,14 @@ AM_CPPFLAGS = -DSHARE_DATADIR="\"$(datadir)\"" \ tor_LDFLAGS = @TOR_LDFLAGS_zlib@ @TOR_LDFLAGS_openssl@ @TOR_LDFLAGS_libevent@ tor_LDADD = ../common/libor.a ../common/libor-crypto.a \ ../common/libor-event.a \ - -lz -levent -lssl -lcrypto @TOR_LIB_WS32@ @TOR_LIB_GDI@ + -lz -lm -levent -lssl -lcrypto @TOR_LIB_WS32@ @TOR_LIB_GDI@ test_SOURCES = $(COMMON_SRC) test_data.c test.c test_LDFLAGS = @TOR_LDFLAGS_zlib@ @TOR_LDFLAGS_openssl@ \ @TOR_LDFLAGS_libevent@ test_LDADD = ../common/libor.a ../common/libor-crypto.a \ ../common/libor-event.a \ - -lz -levent -lssl -lcrypto @TOR_LIB_WS32@ @TOR_LIB_GDI@ + -lz -lm -levent -lssl -lcrypto @TOR_LIB_WS32@ @TOR_LIB_GDI@ noinst_HEADERS = or.h eventdns.h eventdns_tor.h micro-revision.i diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 7334308403..582567b7ee 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -9,14 +9,23 @@ * \brief The actual details of building circuits. **/ +#include + +long int lround(double x); + +double +ln(double x) +{ + return log(x); +} +#undef log + #include "or.h" +#include "crypto.h" /********* START VARIABLES **********/ /** Global list of circuit build times */ -// XXX: Make this a smartlist.. -uint16_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; -int build_times_idx = 0; -int total_build_times = 0; +circuit_build_times_t circ_times; /** A global list of all circuits at this hop. */ extern circuit_t *global_circuitlist; @@ -65,25 +74,24 @@ static int onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice); static void entry_guards_changed(void); static time_t start_of_month(time_t when); -static int circuit_build_times_add_time(time_t time); -/** circuit_build_times is a circular array, so loop around when - * array is full +/** + * circuit_build_times is a circular array, so loop around when + * array is full * * time units are milliseconds */ -static int -circuit_build_times_add_time(long time) +circuit_build_times_add_time(circuit_build_times_t *cbt, long time) { - if(time > UINT16_MAX) { + if (time > UINT16_MAX) { log_notice(LD_CIRC, - "Circuit build time of %dms exceeds max. Capping at 65536ms", time); + "Circuit build time of %ldms exceeds max. Capping at 65536ms", time); time = UINT16_MAX; } - circuit_build_times[build_times_idx] = time; - build_times_idx = (build_times_idx + 1) % NCIRCUITS_TO_OBSERVE; - if(total_build_times + 1 < NCIRCUITS_TO_OBSERVE) - total_build_times++; + cbt->circuit_build_times[cbt->build_times_idx] = time; + cbt->build_times_idx = (cbt->build_times_idx + 1) % NCIRCUITS_TO_OBSERVE; + if (cbt->total_build_times + 1 < NCIRCUITS_TO_OBSERVE) + cbt->total_build_times++; return 0; } @@ -91,15 +99,16 @@ circuit_build_times_add_time(long time) /** * Calculate histogram */ -void -circuit_build_times_create_histogram(uint16_t * histogram) +static void +circuit_build_times_create_histogram(circuit_build_times_t *cbt, + uint16_t *histogram) { - int i, c; - // calculate histogram - for(i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { - if(circuit_build_times[i] == 0) continue; /* 0 <-> uninitialized */ + int i, c; + // calculate histogram + for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] == 0) continue; /* 0 <-> uninitialized */ - c = (circuit_build_times[i] / BUILDTIME_BIN_WIDTH); + c = (cbt->circuit_build_times[i] / BUILDTIME_BIN_WIDTH); histogram[c]++; } } @@ -107,26 +116,29 @@ circuit_build_times_create_histogram(uint16_t * histogram) /** * Find maximum circuit build time */ -uint16_t -circuit_build_times_max() +static uint16_t +circuit_build_times_max(circuit_build_times_t *cbt) { int i = 0, max_build_time = 0; - for( i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { - if(circuit_build_times[i] > max_build_time) - max_build_time = circuit_build_times[i]; + for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] > max_build_time) + max_build_time = cbt->circuit_build_times[i]; } return max_build_time; } -uint16_t -circuit_build_times_min() +static uint16_t +circuit_build_times_min(circuit_build_times_t *cbt) { int i = 0; uint16_t min_build_time = UINT16_MAX; - for( i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { - if(circuit_build_times[i] && /* 0 <-> uninitialized */ - circuit_build_times[i] < min_build_time) - min_build_time = circuit_build_times[i]; + for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */ + cbt->circuit_build_times[i] < min_build_time) + min_build_time = cbt->circuit_build_times[i]; + } + if (min_build_time == UINT16_MAX) { + log_warn(LD_CIRC, "No build times less than UIN16_MAX!"); } return min_build_time; } @@ -135,85 +147,218 @@ circuit_build_times_min() * output a histogram of current circuit build times */ void -circuit_build_times_update_state(or_state_t * state) +circuit_build_times_update_state(circuit_build_times_t *cbt, + or_state_t *state) { uint16_t max_build_time = 0, *histogram; int i = 0, nbins = 0; config_line_t **next, *line; - max_build_time = circuit_build_times_max(); + max_build_time = circuit_build_times_max(cbt); nbins = 1 + (max_build_time / BUILDTIME_BIN_WIDTH); histogram = tor_malloc_zero(nbins * sizeof(uint16_t)); - circuit_build_times_create_histogram(histogram); + circuit_build_times_create_histogram(cbt, histogram); // write to state config_free_lines(state->BuildtimeHistogram); next = &state->BuildtimeHistogram; *next = NULL; - state->TotalBuildTimes = total_build_times; + state->TotalBuildTimes = cbt->total_build_times; // total build times? - for(i = 0; i < nbins; i++) { - if(histogram[i] == 0) continue; // compress the histogram by skipping the blanks + for (i = 0; i < nbins; i++) { + // compress the histogram by skipping the blanks + if (histogram[i] == 0) continue; *next = line = tor_malloc_zero(sizeof(config_line_t)); line->key = tor_strdup("CircuitBuildTimeBin"); - line->value = tor_malloc(20); - tor_snprintf(line->value, 20, "%d %d", i*BUILDTIME_BIN_WIDTH, + line->value = tor_malloc(25); + tor_snprintf(line->value, 25, "%d %d", i*BUILDTIME_BIN_WIDTH, histogram[i]); next = &(line->next); } - if(!get_options()->AvoidDiskWrites) + if (!get_options()->AvoidDiskWrites) or_state_mark_dirty(get_or_state(), 0); - if(histogram) tor_free(histogram); -} - -int -find_next_available(int chosen) -{// find index of next open slot in circuit_build_times - int idx = 0; - for(idx = (chosen + 1) % NCIRCUITS_TO_OBSERVE; idx < chosen; - idx = ((idx + 1 ) % NCIRCUITS_TO_OBSERVE)) { - if(circuit_build_times[idx] == 0) { - return idx; - } - } - return 0; + if (histogram) tor_free(histogram); } /** Load histogram from state */ int -circuit_build_times_parse_state(or_state_t *state, char **msg) +circuit_build_times_parse_state(circuit_build_times_t *cbt, + or_state_t *state, char **msg) { + int tot_values = 0, lines = 0; config_line_t *line; msg = NULL; - memset(circuit_build_times, 0, NCIRCUITS_TO_OBSERVE); - total_build_times = state->TotalBuildTimes; + memset(cbt->circuit_build_times, 0, NCIRCUITS_TO_OBSERVE); + cbt->total_build_times = state->TotalBuildTimes; - for(line = state->BuildtimeHistogram; line; line = line->next) { + for (line = state->BuildtimeHistogram; line; line = line->next) { smartlist_t * args = smartlist_create(); smartlist_split_string(args, line->value, " ", SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0); - if(smartlist_len(args) < 2) { + if (smartlist_len(args) < 2) { *msg = tor_strdup("Unable to parse circuit build times: " "Too few arguments to CircuitBuildTIme"); break; } else { - uint16_t ms, count, i; - /* XXX: use tor_strtol */ - ms = atol(smartlist_get(args,0)); - count = atol(smartlist_get(args,1)); - for(i = 0; i < count; i++) { - circuit_build_times_add_time(ms); + const char *ms_str = smartlist_get(args,0); + const char *count_str = smartlist_get(args,1); + uint32_t count, i; + uint16_t ms; + int ok; + ms = tor_parse_ulong(ms_str, 0, 0, UINT16_MAX, &ok, NULL); + if (!ok) { + *msg = tor_strdup("Unable to parse circuit build times: " + "Unparsable bin number"); + break; + } + count = tor_parse_ulong(count_str, 0, 0, UINT32_MAX, &ok, NULL); + if (!ok) { + *msg = tor_strdup("Unable to parse circuit build times: " + "Unparsable bin count"); + break; + } + lines++; + for (i = 0; i < count; i++) { + circuit_build_times_add_time(cbt, ms); + tot_values++; } } } + log_info(LD_CIRC, "Loaded %d values from %d lines in circuit time histogram", + tot_values, lines); + circuit_build_times_set_timeout(cbt); return (msg ? -1 : 0); } +static void +circuit_build_times_update_alpha(circuit_build_times_t *cbt) +{ + uint16_t *x=cbt->circuit_build_times; + double a = 0; + int n=0,i=0; + /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */ + cbt->Xm = circuit_build_times_min(cbt); + + for (i=0; i< NCIRCUITS_TO_OBSERVE; i++) { + if (!x[i]) continue; + a += ln(x[i]); + n++; + } + tor_assert(n==cbt->total_build_times); + a -= n*ln(cbt->Xm); + a = n/a; + + cbt->alpha = a; +} +/** + * This is the Pareto Quantile Function. It calculates the point x + * in the distribution such that F(x) < quantile (ie quantile*100% + * of the mass of the density function is below x on the curve). + * + * We use it to calculate the timeout and also synthetic values of + * time for circuits that timeout before completion. + * + * See http://en.wikipedia.org/wiki/Quantile_function, + * http://en.wikipedia.org/wiki/Inverse_transform_sampling and + * http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation + * That's right. I'll cite wikipedia all day long. + */ +static double +circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, + double quantile) +{ + return cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha); +} + +static void +circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt) +{ + /* Generate 0.8-1.0... */ + uint64_t r = crypto_rand_uint64(UINT64_MAX-1); + double u = BUILDTIMEOUT_QUANTILE_CUTOFF + + ((1.0-BUILDTIMEOUT_QUANTILE_CUTOFF)*r)/(1.0*UINT64_MAX); + + long gentime = lround(circuit_build_times_calculate_timeout(cbt, u)); + + if (gentime < get_options()->CircuitBuildTimeout*1000) { + log_warn(LD_CIRC, + "Generated a synthetic timeout LESS than the current timeout: %ld vs %d", + gentime, get_options()->CircuitBuildTimeout*1000); + tor_assert(gentime >= get_options()->CircuitBuildTimeout*1000); + } else if (gentime > UINT16_MAX) { + gentime = UINT16_MAX; + log_info(LD_CIRC, + "Generated a synthetic timeout LESS than the current timeout: %ld vs %d", + gentime, get_options()->CircuitBuildTimeout*1000); + } else { + log_info(LD_CIRC, "Generated synthetic time %ld for timeout", + gentime); + } + + circuit_build_times_add_time(cbt, gentime); +} + +/** + * Store a timeout as a synthetic value + */ +void +circuit_build_times_add_timeout(circuit_build_times_t *cbt) +{ + if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) { + /* Store a timeout before we have enough data as special */ + cbt->pre_timeouts++; + return; + } + + /* Store a timeout as a random position on this curve */ + if (cbt->pre_timeouts && get_options()->CircuitBuildTimeout != 60) { + cbt->Xm = circuit_build_times_min(cbt); + // Use current timeout to get an estimate on alpha + // Q(u) = Xm/((1-u)^(1/a)) + // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout + // CircBuildTimeout = Xm/((1-0.8))^(1/a)) + // CircBuildTimeout = Xm*((1-0.8))^(-1/a)) + // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a) + // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a + cbt->alpha = -ln(1-BUILDTIMEOUT_QUANTILE_CUTOFF)/ + (ln(get_options()->CircuitBuildTimeout)-ln(cbt->Xm)); + while (cbt->pre_timeouts-- != 0) { + circuit_build_times_add_timeout_worker(cbt); + } + } + + cbt->pre_timeouts = 0; + circuit_build_times_add_timeout_worker(cbt); +} + +void +circuit_build_times_set_timeout(circuit_build_times_t *cbt) +{ + double timeout; + + if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) { + log_info(LD_CIRC, + "Not enough circuits yet to calculate a new build timeout." + " Need %d more.", + MIN_CIRCUITS_TO_OBSERVE-cbt->total_build_times); + return; + } + + circuit_build_times_update_alpha(cbt); + timeout = circuit_build_times_calculate_timeout(cbt, + BUILDTIMEOUT_QUANTILE_CUTOFF); + + get_options()->CircuitBuildTimeout = lround(timeout/1000.0); + + log_info(LD_CIRC, + "Set circuit build timeout to %d based on %d circuit times", + get_options()->CircuitBuildTimeout, cbt->total_build_times); +} /** Iterate over values of circ_id, starting from conn-\>next_circ_id, * and with the high bit specified by conn-\>circ_id_type, until we get @@ -800,9 +945,9 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) tor_gettimeofday(&end); /* done building the circuit. whew. */ circuit_set_state(TO_CIRCUIT(circ), CIRCUIT_STATE_OPEN); - circuit_build_times_add_time(tor_mdiff(&circ->_base.timestamp_created, - &end)); - circuit_build_times_recompute(); + circuit_build_times_add_time(&circ_times, + tv_mdiff(&circ->_base.highres_created, &end)); + circuit_build_times_set_timeout(&circ_times); log_info(LD_CIRC,"circuit built!"); circuit_reset_failure_count(0); if (circ->build_state->onehop_tunnel) diff --git a/src/or/circuituse.c b/src/or/circuituse.c index ee2d0bbabf..e93d28df72 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -358,6 +358,8 @@ circuit_expire_building(time_t now) circuit_log_path(LOG_INFO,LD_CIRC,TO_ORIGIN_CIRCUIT(victim)); circuit_mark_for_close(victim, END_CIRC_REASON_TIMEOUT); + circuit_build_times_add_timeout(&circ_times); + circuit_build_times_set_timeout(&circ_times); } } diff --git a/src/or/config.c b/src/or/config.c index 0345ca7281..39a4ac139d 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -413,7 +413,6 @@ static config_var_t _state_vars[] = { VAR("CircuitBuildTimeBin", LINELIST_S, BuildtimeHistogram, NULL), VAR("BuildtimeHistogram", LINELIST_V, BuildtimeHistogram, NULL), - { NULL, CONFIG_TYPE_OBSOLETE, 0, NULL } }; @@ -2923,7 +2922,7 @@ compute_publishserverdescriptor(or_options_t *options) /** Lowest allowable value for CircuitBuildTimeout; values too low will * increase network load because of failing connections being retried, and * might prevent users from connecting to the network at all. */ -#define MIN_CIRCUIT_BUILD_TIMEOUT 30 +#define MIN_CIRCUIT_BUILD_TIMEOUT 5 /** Lowest allowable value for MaxCircuitDirtiness; if this is too low, Tor * will generate too many circuits and potentially overload the network. */ @@ -5070,7 +5069,7 @@ or_state_set(or_state_t *new_state) tor_free(err); } - if(circuit_build_times_parse_state(global_state, &err) < 0) { + if (circuit_build_times_parse_state(&circ_times, global_state, &err) < 0) { log_warn(LD_GENERAL,"%s",err); tor_free(err); @@ -5208,7 +5207,7 @@ or_state_save(time_t now) * to avoid redundant writes. */ entry_guards_update_state(global_state); rep_hist_update_state(global_state); - circuit_build_times_update_state(global_state); + circuit_build_times_update_state(&circ_times, global_state); if (accounting_is_enabled(get_options())) accounting_run_housekeeping(now); diff --git a/src/or/or.h b/src/or/or.h index be54ab4bf7..5e94a56e7c 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1890,6 +1890,8 @@ typedef struct crypt_path_t { #define NCIRCUITS_TO_OBSERVE 10000 /* approx 3 weeks worth of circuits */ #define BUILDTIME_BIN_WIDTH 50 +/* TODO: This should be moved to the consensus */ +#define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8 /** Information used to build a circuit. */ typedef struct { @@ -1984,7 +1986,7 @@ typedef struct circuit_t { time_t timestamp_created; /**< When was this circuit created? */ time_t timestamp_dirty; /**< When the circuit was first used, or 0 if the * circuit is clean. */ - struct timeval highres_created; /**< When exactly was this circuit created? */ + struct timeval highres_created; /**< When exactly was the circuit created? */ uint16_t marked_for_close; /**< Should we close this circuit at the end of * the main loop? (If true, holds the line number @@ -2695,7 +2697,6 @@ typedef struct { config_line_t * BuildtimeHistogram; uint16_t TotalBuildTimes; - /** What version of Tor wrote this state file? */ char *TorVersion; @@ -2865,10 +2866,25 @@ void bridges_retry_all(void); void entry_guards_free_all(void); -void circuit_build_times_update_state(or_state_t *state); -int circuit_build_times_parse_state(or_state_t *state, char **msg); - - +/* Circuit Build Timeout "public" functions (I love C... No wait.) */ +typedef struct { + // XXX: Make this a smartlist.. + uint16_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; + int build_times_idx; + int total_build_times; + int pre_timeouts; + uint16_t Xm; + double alpha; +} circuit_build_times_t; + +extern circuit_build_times_t circ_times; +void circuit_build_times_update_state(circuit_build_times_t *cbt, + or_state_t *state); +int circuit_build_times_parse_state(circuit_build_times_t *cbt, + or_state_t *state, char **msg); +void circuit_build_times_add_timeout(circuit_build_times_t *cbt); +void circuit_build_times_set_timeout(circuit_build_times_t *cbt); +int circuit_build_times_add_time(circuit_build_times_t *cbt, long time); /********************************* circuitlist.c ***********************/ -- cgit v1.2.3-54-g00ecf From b52bce91fc25d29f1d1a7b5a32076eadd7005d2f Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Thu, 27 Aug 2009 23:28:20 -0700 Subject: Write unit tests and fix issues they uncovered. --- src/or/circuitbuild.c | 158 ++++++++++++++++++++++++++++++++++---------------- src/or/config.c | 2 +- src/or/or.h | 45 +++++++++----- src/or/test.c | 67 +++++++++++++++++++++ 4 files changed, 206 insertions(+), 66 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 582567b7ee..ee9dce2b38 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -9,9 +9,13 @@ * \brief The actual details of building circuits. **/ +// XXX: Move this noise to common/compat.c? #include +// also use pow() + long int lround(double x); +double ln(double x); double ln(double x) @@ -20,6 +24,8 @@ ln(double x) } #undef log +#define CIRCUIT_PRIVATE + #include "or.h" #include "crypto.h" @@ -81,16 +87,19 @@ static time_t start_of_month(time_t when); * time units are milliseconds */ int -circuit_build_times_add_time(circuit_build_times_t *cbt, long time) +circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) { - if (time > UINT16_MAX) { + if (time > BUILD_TIME_MAX) { log_notice(LD_CIRC, - "Circuit build time of %ldms exceeds max. Capping at 65536ms", time); - time = UINT16_MAX; + "Circuit build time of %ums exceeds max. Capping at 65536ms", time); + time = BUILD_TIME_MAX; + } else if (time <= 0) { + log_err(LD_CIRC, "Circuit build time is %u!", time); + return -1; } cbt->circuit_build_times[cbt->build_times_idx] = time; cbt->build_times_idx = (cbt->build_times_idx + 1) % NCIRCUITS_TO_OBSERVE; - if (cbt->total_build_times + 1 < NCIRCUITS_TO_OBSERVE) + if (cbt->total_build_times < NCIRCUITS_TO_OBSERVE) cbt->total_build_times++; return 0; @@ -101,7 +110,7 @@ circuit_build_times_add_time(circuit_build_times_t *cbt, long time) */ static void circuit_build_times_create_histogram(circuit_build_times_t *cbt, - uint16_t *histogram) + build_time_t *histogram) { int i, c; // calculate histogram @@ -116,10 +125,11 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt, /** * Find maximum circuit build time */ -static uint16_t +static build_time_t circuit_build_times_max(circuit_build_times_t *cbt) { - int i = 0, max_build_time = 0; + int i = 0; + build_time_t max_build_time = 0; for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { if (cbt->circuit_build_times[i] > max_build_time) max_build_time = cbt->circuit_build_times[i]; @@ -127,36 +137,39 @@ circuit_build_times_max(circuit_build_times_t *cbt) return max_build_time; } -static uint16_t +static build_time_t circuit_build_times_min(circuit_build_times_t *cbt) { int i = 0; - uint16_t min_build_time = UINT16_MAX; + build_time_t min_build_time = BUILD_TIME_MAX; for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */ cbt->circuit_build_times[i] < min_build_time) min_build_time = cbt->circuit_build_times[i]; } - if (min_build_time == UINT16_MAX) { - log_warn(LD_CIRC, "No build times less than UIN16_MAX!"); + if (min_build_time == BUILD_TIME_MAX) { + log_warn(LD_CIRC, "No build times less than BUILD_TIME_MAX!"); } return min_build_time; } /** - * output a histogram of current circuit build times + * output a histogram of current circuit build times. + * + * XXX: Is do_unit the right way to do this unit test + * noise? */ void circuit_build_times_update_state(circuit_build_times_t *cbt, - or_state_t *state) + or_state_t *state, int do_unit) { - uint16_t max_build_time = 0, *histogram; + build_time_t max_build_time = 0, *histogram; int i = 0, nbins = 0; config_line_t **next, *line; max_build_time = circuit_build_times_max(cbt); nbins = 1 + (max_build_time / BUILDTIME_BIN_WIDTH); - histogram = tor_malloc_zero(nbins * sizeof(uint16_t)); + histogram = tor_malloc_zero(nbins * sizeof(build_time_t)); circuit_build_times_create_histogram(cbt, histogram); // write to state @@ -177,8 +190,11 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, histogram[i]); next = &(line->next); } - if (!get_options()->AvoidDiskWrites) - or_state_mark_dirty(get_or_state(), 0); + + if (!do_unit) { + if (!get_options()->AvoidDiskWrites) + or_state_mark_dirty(get_or_state(), 0); + } if (histogram) tor_free(histogram); } @@ -191,8 +207,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, int tot_values = 0, lines = 0; config_line_t *line; msg = NULL; - memset(cbt->circuit_build_times, 0, NCIRCUITS_TO_OBSERVE); - cbt->total_build_times = state->TotalBuildTimes; + memset(cbt, 0, sizeof(*cbt)); for (line = state->BuildtimeHistogram; line; line = line->next) { smartlist_t * args = smartlist_create(); @@ -206,9 +221,9 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, const char *ms_str = smartlist_get(args,0); const char *count_str = smartlist_get(args,1); uint32_t count, i; - uint16_t ms; + build_time_t ms; int ok; - ms = tor_parse_ulong(ms_str, 0, 0, UINT16_MAX, &ok, NULL); + ms = tor_parse_ulong(ms_str, 0, 0, BUILD_TIME_MAX, &ok, NULL); if (!ok) { *msg = tor_strdup("Unable to parse circuit build times: " "Unparsable bin number"); @@ -233,10 +248,10 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, return (msg ? -1 : 0); } -static void +void circuit_build_times_update_alpha(circuit_build_times_t *cbt) { - uint16_t *x=cbt->circuit_build_times; + build_time_t *x=cbt->circuit_build_times; double a = 0; int n=0,i=0; @@ -248,6 +263,10 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) a += ln(x[i]); n++; } + if (n!=cbt->total_build_times) { + log_err(LD_CIRC, "Discrepency in build times count: %d vs %d", n, + cbt->total_build_times); + } tor_assert(n==cbt->total_build_times); a -= n*ln(cbt->Xm); a = n/a; @@ -257,7 +276,7 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) /** * This is the Pareto Quantile Function. It calculates the point x - * in the distribution such that F(x) < quantile (ie quantile*100% + * in the distribution such that F(x) = quantile (ie quantile*100% * of the mass of the density function is below x on the curve). * * We use it to calculate the timeout and also synthetic values of @@ -265,50 +284,92 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) * * See http://en.wikipedia.org/wiki/Quantile_function, * http://en.wikipedia.org/wiki/Inverse_transform_sampling and - * http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation + * http://en.wikipedia.org/wiki/Pareto_distribution#Generating_a_random_sample_from_Pareto_distribution * That's right. I'll cite wikipedia all day long. */ -static double +double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, double quantile) { - return cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha); + double ret = cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha); + if (ret > INT32_MAX) { + ret = INT32_MAX; + } + tor_assert(ret > 0); + return ret; +} + +/* Pareto CDF */ +double +circuit_build_times_cdf(circuit_build_times_t *cbt, + double x) +{ + double ret = 1.0-pow(cbt->Xm/x,cbt->alpha); + tor_assert(0 <= ret && ret <= 1.0); + return ret; +} + +build_time_t +circuit_build_times_generate_sample(circuit_build_times_t *cbt, + double q_lo, double q_hi) +{ + uint32_t r = crypto_rand_int(UINT32_MAX-1); + double u = q_lo + ((q_hi-q_lo)*r)/(1.0*UINT32_MAX); + build_time_t ret; + + tor_assert(0 <= u && u < 1.0); + ret = lround(circuit_build_times_calculate_timeout(cbt, u)); + tor_assert(ret > 0); + return ret; } static void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt) { /* Generate 0.8-1.0... */ - uint64_t r = crypto_rand_uint64(UINT64_MAX-1); - double u = BUILDTIMEOUT_QUANTILE_CUTOFF + - ((1.0-BUILDTIMEOUT_QUANTILE_CUTOFF)*r)/(1.0*UINT64_MAX); - - long gentime = lround(circuit_build_times_calculate_timeout(cbt, u)); + build_time_t gentime = circuit_build_times_generate_sample(cbt, + BUILDTIMEOUT_QUANTILE_CUTOFF, 1.0); - if (gentime < get_options()->CircuitBuildTimeout*1000) { + if (gentime < (build_time_t)get_options()->CircuitBuildTimeout*1000) { log_warn(LD_CIRC, - "Generated a synthetic timeout LESS than the current timeout: %ld vs %d", + "Generated a synthetic timeout LESS than the current timeout: %u vs %d", gentime, get_options()->CircuitBuildTimeout*1000); - tor_assert(gentime >= get_options()->CircuitBuildTimeout*1000); - } else if (gentime > UINT16_MAX) { - gentime = UINT16_MAX; + tor_assert(gentime >= + (build_time_t)get_options()->CircuitBuildTimeout*1000); + } else if (gentime > BUILD_TIME_MAX) { + gentime = BUILD_TIME_MAX; log_info(LD_CIRC, - "Generated a synthetic timeout LESS than the current timeout: %ld vs %d", - gentime, get_options()->CircuitBuildTimeout*1000); + "Generated a synthetic timeout larger than the max: %u", gentime); } else { - log_info(LD_CIRC, "Generated synthetic time %ld for timeout", - gentime); + log_info(LD_CIRC, "Generated synthetic time %u for timeout", gentime); } circuit_build_times_add_time(cbt, gentime); } +void +circuit_build_times_initial_alpha(circuit_build_times_t *cbt, + double quantile, build_time_t timeout) +{ + // Q(u) = Xm/((1-u)^(1/a)) + // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout + // CircBuildTimeout = Xm/((1-0.8))^(1/a)) + // CircBuildTimeout = Xm*((1-0.8))^(-1/a)) + // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a) + // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a + cbt->alpha = ln(1.0-quantile)/(ln(cbt->Xm)-ln(timeout)); +} + /** * Store a timeout as a synthetic value */ void circuit_build_times_add_timeout(circuit_build_times_t *cbt) { + /* XXX: If there are a ton of timeouts, we should reduce + * the circuit build timeout by like 2X or something... + * But then how do we differentiate between that and network + * failure? */ if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) { /* Store a timeout before we have enough data as special */ cbt->pre_timeouts++; @@ -316,17 +377,12 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt) } /* Store a timeout as a random position on this curve */ - if (cbt->pre_timeouts && get_options()->CircuitBuildTimeout != 60) { + if (cbt->pre_timeouts) { cbt->Xm = circuit_build_times_min(cbt); // Use current timeout to get an estimate on alpha - // Q(u) = Xm/((1-u)^(1/a)) - // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout - // CircBuildTimeout = Xm/((1-0.8))^(1/a)) - // CircBuildTimeout = Xm*((1-0.8))^(-1/a)) - // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a) - // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a - cbt->alpha = -ln(1-BUILDTIMEOUT_QUANTILE_CUTOFF)/ - (ln(get_options()->CircuitBuildTimeout)-ln(cbt->Xm)); + circuit_build_times_initial_alpha(cbt, + 1.0-((double)cbt->pre_timeouts)/cbt->total_build_times, + get_options()->CircuitBuildTimeout*1000); while (cbt->pre_timeouts-- != 0) { circuit_build_times_add_timeout_worker(cbt); } diff --git a/src/or/config.c b/src/or/config.c index 39a4ac139d..45ce5cf13b 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -5207,7 +5207,7 @@ or_state_save(time_t now) * to avoid redundant writes. */ entry_guards_update_state(global_state); rep_hist_update_state(global_state); - circuit_build_times_update_state(&circ_times, global_state); + circuit_build_times_update_state(&circ_times, global_state, 0); if (accounting_is_enabled(get_options())) accounting_run_housekeeping(now); diff --git a/src/or/or.h b/src/or/or.h index 5e94a56e7c..13626c4d85 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1884,15 +1884,6 @@ typedef struct crypt_path_t { DH_KEY_LEN) #define ONIONSKIN_REPLY_LEN (DH_KEY_LEN+DIGEST_LEN) -// XXX: Do we want to artifically tweak CircuitIdleTimeout and -// the number of circuits we build at a time if < MIN here? -#define MIN_CIRCUITS_TO_OBSERVE 1000 -#define NCIRCUITS_TO_OBSERVE 10000 /* approx 3 weeks worth of circuits */ -#define BUILDTIME_BIN_WIDTH 50 - -/* TODO: This should be moved to the consensus */ -#define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8 - /** Information used to build a circuit. */ typedef struct { /** Intended length of the final circuit. */ @@ -2866,25 +2857,51 @@ void bridges_retry_all(void); void entry_guards_free_all(void); -/* Circuit Build Timeout "public" functions (I love C... No wait.) */ +/* Circuit Build Timeout "public" functions and structures. + * (I love C... No wait.) */ + +// XXX: Do we want to artifically tweak CircuitIdleTimeout and +// the number of circuits we build at a time if < MIN here? +#define MIN_CIRCUITS_TO_OBSERVE 500 +#define NCIRCUITS_TO_OBSERVE 5000 /* approx 1.5 weeks worth of circuits */ +#define BUILDTIME_BIN_WIDTH 50 + +/* TODO: This should be moved to the consensus */ +#define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8 + +typedef uint32_t build_time_t; +#define BUILD_TIME_MAX ((build_time_t)(INT32_MAX)) + typedef struct { // XXX: Make this a smartlist.. - uint16_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; + build_time_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; int build_times_idx; int total_build_times; int pre_timeouts; - uint16_t Xm; + build_time_t Xm; double alpha; } circuit_build_times_t; extern circuit_build_times_t circ_times; void circuit_build_times_update_state(circuit_build_times_t *cbt, - or_state_t *state); + or_state_t *state, int do_unit); int circuit_build_times_parse_state(circuit_build_times_t *cbt, or_state_t *state, char **msg); void circuit_build_times_add_timeout(circuit_build_times_t *cbt); void circuit_build_times_set_timeout(circuit_build_times_t *cbt); -int circuit_build_times_add_time(circuit_build_times_t *cbt, long time); +int circuit_build_times_add_time(circuit_build_times_t *cbt, + build_time_t time); + +#ifdef CIRCUIT_PRIVATE +double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, + double quantile); +build_time_t circuit_build_times_generate_sample(circuit_build_times_t *cbt, + double q_lo, double q_hi); +void circuit_build_times_initial_alpha(circuit_build_times_t *cbt, + double quantile, build_time_t time); +void circuit_build_times_update_alpha(circuit_build_times_t *cbt); +double circuit_build_times_cdf(circuit_build_times_t *cbt, double x); +#endif /********************************* circuitlist.c ***********************/ diff --git a/src/or/test.c b/src/or/test.c index f2cc7cc1f3..ea8ce86cfd 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -37,6 +37,9 @@ const char tor_git_revision[] = ""; #define GEOIP_PRIVATE #define MEMPOOL_PRIVATE #define ROUTER_PRIVATE +#define CIRCUIT_PRIVATE + +#include #include "or.h" #include "test.h" @@ -3404,6 +3407,69 @@ test_dirutil_param_voting(void) smartlist_free(vote3.net_params); smartlist_free(vote4.net_params); + return; +} + +static void +test_circuit_timeout(void) +{ + /* Plan: + * 1. Generate 1000 samples + * 2. Estimate parameters + * 3. If difference, repeat + * 4. Save state + * 5. load state + * 6. Estimate parameters + * 7. compare differences + */ + circuit_build_times_t initial; + circuit_build_times_t estimate; + circuit_build_times_t final; + or_state_t state; + int i; + char *msg; + double timeout1, timeout2; + memset(&initial, 0, sizeof(circuit_build_times_t)); + memset(&estimate, 0, sizeof(circuit_build_times_t)); + memset(&final, 0, sizeof(circuit_build_times_t)); + memset(&state, 0, sizeof(or_state_t)); + +#define timeout0 (30*1000.0) + initial.Xm = 750; + circuit_build_times_initial_alpha(&initial, BUILDTIMEOUT_QUANTILE_CUTOFF, + timeout0); + do { + int n = 0; + for (i=0; i < MIN_CIRCUITS_TO_OBSERVE; i++) { + if (circuit_build_times_add_time(&estimate, + circuit_build_times_generate_sample(&initial, 0, 1)) == 0) { + n++; + } + } + circuit_build_times_update_alpha(&estimate); + timeout1 = circuit_build_times_calculate_timeout(&estimate, + BUILDTIMEOUT_QUANTILE_CUTOFF); + log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout1, estimate.Xm); + } while (fabs(circuit_build_times_cdf(&initial, timeout0) - + circuit_build_times_cdf(&initial, timeout1)) > 0.05 + /* 5% error */ + && estimate.total_build_times < NCIRCUITS_TO_OBSERVE); + + test_assert(estimate.total_build_times < NCIRCUITS_TO_OBSERVE); + + circuit_build_times_update_state(&estimate, &state, 1); + test_assert(circuit_build_times_parse_state(&final, &state, &msg) == 0); + + circuit_build_times_update_alpha(&final); + timeout2 = circuit_build_times_calculate_timeout(&final, + BUILDTIMEOUT_QUANTILE_CUTOFF); + log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout2, final.Xm); + + test_assert(fabs(circuit_build_times_cdf(&initial, timeout0) - + circuit_build_times_cdf(&initial, timeout2)) < 0.05); + +done: + return; } extern const char AUTHORITY_CERT_1[]; @@ -4931,6 +4997,7 @@ static struct { ENT(dirutil), SUBENT(dirutil, measured_bw), SUBENT(dirutil, param_voting), + ENT(circuit_timeout), ENT(v3_networkstatus), ENT(policies), ENT(rend_fns), -- cgit v1.2.3-54-g00ecf From 411b60325b9b21bb73d9b3637a726d6394b5c624 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Fri, 28 Aug 2009 02:05:02 -0700 Subject: Factor out the pretimeout handling code. We need to also call it if we're going to calculate alpha after a normal circuit build. --- src/or/circuitbuild.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index ee9dce2b38..b0bc8404b0 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -301,8 +301,7 @@ circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, /* Pareto CDF */ double -circuit_build_times_cdf(circuit_build_times_t *cbt, - double x) +circuit_build_times_cdf(circuit_build_times_t *cbt, double x) { double ret = 1.0-pow(cbt->Xm/x,cbt->alpha); tor_assert(0 <= ret && ret <= 1.0); @@ -360,6 +359,23 @@ circuit_build_times_initial_alpha(circuit_build_times_t *cbt, cbt->alpha = ln(1.0-quantile)/(ln(cbt->Xm)-ln(timeout)); } +static void +circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) +{ + /* Store a timeout as a random position on this curve. */ + if (cbt->pre_timeouts) { + cbt->Xm = circuit_build_times_min(cbt); + // Use current timeout to get an estimate on alpha + circuit_build_times_initial_alpha(cbt, + 1.0-((double)cbt->pre_timeouts)/cbt->total_build_times, + get_options()->CircuitBuildTimeout*1000); + while (cbt->pre_timeouts-- != 0) { + circuit_build_times_add_timeout_worker(cbt); + } + cbt->pre_timeouts = 0; + } +} + /** * Store a timeout as a synthetic value */ @@ -376,19 +392,7 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt) return; } - /* Store a timeout as a random position on this curve */ - if (cbt->pre_timeouts) { - cbt->Xm = circuit_build_times_min(cbt); - // Use current timeout to get an estimate on alpha - circuit_build_times_initial_alpha(cbt, - 1.0-((double)cbt->pre_timeouts)/cbt->total_build_times, - get_options()->CircuitBuildTimeout*1000); - while (cbt->pre_timeouts-- != 0) { - circuit_build_times_add_timeout_worker(cbt); - } - } - - cbt->pre_timeouts = 0; + circuit_build_times_count_pretimeouts(cbt); circuit_build_times_add_timeout_worker(cbt); } @@ -405,6 +409,7 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) return; } + circuit_build_times_count_pretimeouts(cbt); circuit_build_times_update_alpha(cbt); timeout = circuit_build_times_calculate_timeout(cbt, BUILDTIMEOUT_QUANTILE_CUTOFF); -- cgit v1.2.3-54-g00ecf From 7ac9a66c8fb2ec369a7f99cc502200406f3760b2 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Mon, 31 Aug 2009 18:10:27 -0700 Subject: Recover from changing network connections. Also add code to keep creating circuits every minute until we hit our minimum threshhold. --- src/or/circuitbuild.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++-- src/or/circuituse.c | 21 ++++++++- src/or/connection_or.c | 5 ++ src/or/or.h | 24 +++++++--- src/or/test.c | 25 ++++++++++ 5 files changed, 188 insertions(+), 12 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index b0bc8404b0..a9ae139f13 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -97,6 +97,8 @@ circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) log_err(LD_CIRC, "Circuit build time is %u!", time); return -1; } + + cbt->last_circ_at = approx_time(); cbt->circuit_build_times[cbt->build_times_idx] = time; cbt->build_times_idx = (cbt->build_times_idx + 1) % NCIRCUITS_TO_OBSERVE; if (cbt->total_build_times < NCIRCUITS_TO_OBSERVE) @@ -322,7 +324,7 @@ circuit_build_times_generate_sample(circuit_build_times_t *cbt, return ret; } -static void +void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt) { /* Generate 0.8-1.0... */ @@ -376,16 +378,129 @@ circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) } } +/** + * Returns true if we need circuits to be built + */ +int +circuit_build_times_needs_circuits(circuit_build_times_t *cbt) +{ + /* Return true if < MIN_CIRCUITS_TO_OBSERVE */ + if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) + return 1; + return 0; +} + +int +circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt) +{ + return circuit_build_times_needs_circuits(cbt) && + approx_time()-cbt->last_circ_at > BUILD_TIMES_TEST_FREQUENCY; +} + +void +circuit_build_times_network_is_live(circuit_build_times_t *cbt) +{ + cbt->network_last_live = approx_time(); +} + +int +circuit_build_times_is_network_live(circuit_build_times_t *cbt) +{ + time_t now = approx_time(); + if (now - cbt->network_last_live > NETWORK_LIVE_INTERVAL) + return 0; + return 1; +} + +int +circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) +{ + double timeout_rate=0; + build_time_t Xm = BUILD_TIME_MAX; + double timeout; + int i; + + if (cbt->total_build_times < RECENT_CIRCUITS) { + return 0; + } + + /* Get timeout rate and Xm for recent circs */ + for (i = (cbt->build_times_idx - RECENT_CIRCUITS) % NCIRCUITS_TO_OBSERVE; + i != cbt->build_times_idx; + i = (i + 1) % NCIRCUITS_TO_OBSERVE) { + if (cbt->circuit_build_times[i] < Xm) { + Xm = cbt->circuit_build_times[i]; + } + if (cbt->circuit_build_times[i] > + (build_time_t)get_options()->CircuitBuildTimeout*1000) { + timeout_rate++; + } + } + timeout_rate /= RECENT_CIRCUITS; + + /* If more then 80% of our recent circuits are timing out, + * we need to re-estimate a new initial alpha and timeout */ + if (timeout_rate < MAX_RECENT_TIMEOUT_RATE) { + return 0; + } + + log_notice(LD_CIRC, + "Network connection type appears to have changed. " + "Resetting timeouts."); + + if (Xm >= (build_time_t)get_options()->CircuitBuildTimeout*1000) { + Xm = circuit_build_times_min(cbt); + if (Xm >= (build_time_t)get_options()->CircuitBuildTimeout*1000) { + /* No circuits have completed */ + get_options()->CircuitBuildTimeout *= 2; + log_warn(LD_CIRC, + "Adjusting CircuitBuildTimeout to %d in the hopes that " + "some connections will succeed", + get_options()->CircuitBuildTimeout); + goto reset; + } + } + cbt->Xm = Xm; + + circuit_build_times_initial_alpha(cbt, 1.0-timeout_rate, + get_options()->CircuitBuildTimeout*1000.0); + + timeout = circuit_build_times_calculate_timeout(cbt, + BUILDTIMEOUT_QUANTILE_CUTOFF); + + get_options()->CircuitBuildTimeout = lround(timeout/1000.0); + + log_notice(LD_CIRC, + "Set circuit build timeout to %d based on %d recent circuit times", + get_options()->CircuitBuildTimeout, RECENT_CIRCUITS); + +reset: + + /* Reset all data. Do we need a constructor? */ + memset(cbt->circuit_build_times, 0, sizeof(cbt->circuit_build_times)); + cbt->pre_timeouts = 0; + cbt->total_build_times = 0; + cbt->build_times_idx = 0; + return 1; +} + /** * Store a timeout as a synthetic value */ void circuit_build_times_add_timeout(circuit_build_times_t *cbt) { - /* XXX: If there are a ton of timeouts, we should reduce - * the circuit build timeout by like 2X or something... - * But then how do we differentiate between that and network - * failure? */ + /* Only count timeouts if network is live.. */ + if (!circuit_build_times_is_network_live(cbt)) { + return; + } + + /* If there are a ton of timeouts, we should reduce + * the circuit build timeout */ + if (circuit_build_times_check_too_many_timeouts(cbt)) { + return; + } + if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) { /* Store a timeout before we have enough data as special */ cbt->pre_timeouts++; diff --git a/src/or/circuituse.c b/src/or/circuituse.c index e93d28df72..844ea72883 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -519,6 +519,15 @@ circuit_predict_and_launch_new(void) circuit_launch_by_router(CIRCUIT_PURPOSE_C_GENERAL, NULL, flags); return; } + + /* Finally, check to see if we still need more circuits to learn + * a good build timeout */ + if (circuit_build_times_needs_circuits_now(&circ_times)) { + flags = CIRCLAUNCH_NEED_CAPACITY; + log_info(LD_CIRC, + "Have %d clean circs need another buildtime test circ.", num); + circuit_launch_by_router(CIRCUIT_PURPOSE_C_GENERAL, NULL, flags); + } } /** Build a new test circuit every 5 minutes */ @@ -633,7 +642,15 @@ static void circuit_expire_old_circuits(time_t now) { circuit_t *circ; - time_t cutoff = now - get_options()->CircuitIdleTimeout; + time_t cutoff; + + if (circuit_build_times_needs_circuits(&circ_times)) { + /* Circuits should be shorter lived if we need them + * for build time testing */ + cutoff = now - get_options()->MaxCircuitDirtiness; + } else { + cutoff = now - get_options()->CircuitIdleTimeout; + } for (circ = global_circuitlist; circ; circ = circ->next) { if (circ->marked_for_close || ! CIRCUIT_IS_ORIGIN(circ)) @@ -840,6 +857,7 @@ circuit_build_failed(origin_circuit_t *circ) break; case CIRCUIT_PURPOSE_C_INTRODUCING: /* at Alice, connecting to intro point */ + circuit_increment_failure_count(); /* Don't increment failure count, since Bob may have picked * the introduction point maliciously */ /* Alice will pick a new intro point when this one dies, if @@ -853,6 +871,7 @@ circuit_build_failed(origin_circuit_t *circ) break; case CIRCUIT_PURPOSE_S_CONNECT_REND: /* at Bob, connecting to rend point */ + circuit_increment_failure_count(); /* Don't increment failure count, since Alice may have picked * the rendezvous point maliciously */ log_info(LD_REND, diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 8c8b5496a7..aa26bf8f4b 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -1036,6 +1036,8 @@ connection_tls_finish_handshake(or_connection_t *conn) digest_rcvd) < 0) return -1; + circuit_build_times_network_is_live(&circ_times); + if (tor_tls_used_v1_handshake(conn->tls)) { conn->link_proto = 1; if (!started_here) { @@ -1087,6 +1089,7 @@ connection_or_set_state_open(or_connection_t *conn) control_event_or_conn_status(conn, OR_CONN_EVENT_CONNECTED, 0); if (started_here) { + circuit_build_times_network_is_live(&circ_times); rep_hist_note_connect_succeeded(conn->identity_digest, now); if (entry_guard_register_connect_status(conn->identity_digest, 1, 0, now) < 0) { @@ -1187,6 +1190,7 @@ connection_or_process_cells_from_inbuf(or_connection_t *conn) if (connection_fetch_var_cell_from_buf(conn, &var_cell)) { if (!var_cell) return 0; /* not yet. */ + circuit_build_times_network_is_live(&circ_times); command_process_var_cell(var_cell, conn); var_cell_free(var_cell); } else { @@ -1196,6 +1200,7 @@ connection_or_process_cells_from_inbuf(or_connection_t *conn) available? */ return 0; /* not yet */ + circuit_build_times_network_is_live(&circ_times); connection_fetch_from_buf(buf, CELL_NETWORK_SIZE, TO_CONN(conn)); /* retrieve cell info from buf (create the host-order struct from the diff --git a/src/or/or.h b/src/or/or.h index 13626c4d85..809e38572f 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2857,24 +2857,30 @@ void bridges_retry_all(void); void entry_guards_free_all(void); -/* Circuit Build Timeout "public" functions and structures. - * (I love C... No wait.) */ - -// XXX: Do we want to artifically tweak CircuitIdleTimeout and -// the number of circuits we build at a time if < MIN here? +/* Circuit Build Timeout "public" functions and structures. */ +#define RECENT_CIRCUITS 20 #define MIN_CIRCUITS_TO_OBSERVE 500 #define NCIRCUITS_TO_OBSERVE 5000 /* approx 1.5 weeks worth of circuits */ #define BUILDTIME_BIN_WIDTH 50 +#define MAX_RECENT_TIMEOUT_RATE 0.80 + /* TODO: This should be moved to the consensus */ #define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8 typedef uint32_t build_time_t; #define BUILD_TIME_MAX ((build_time_t)(INT32_MAX)) +/* Have we recieved a cell in the last 90 seconds? */ +#define NETWORK_LIVE_INTERVAL 90 + +/* How often in seconds should we build a test circuit */ +#define BUILD_TIMES_TEST_FREQUENCY 60 + typedef struct { - // XXX: Make this a smartlist.. build_time_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; + time_t network_last_live; + time_t last_circ_at; int build_times_idx; int total_build_times; int pre_timeouts; @@ -2891,6 +2897,10 @@ void circuit_build_times_add_timeout(circuit_build_times_t *cbt); void circuit_build_times_set_timeout(circuit_build_times_t *cbt); int circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time); +void circuit_build_times_network_is_live(circuit_build_times_t *cbt); +int circuit_build_times_is_network_live(circuit_build_times_t *cbt); +int circuit_build_times_needs_circuits(circuit_build_times_t *cbt); +int circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt); #ifdef CIRCUIT_PRIVATE double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, @@ -2901,6 +2911,8 @@ void circuit_build_times_initial_alpha(circuit_build_times_t *cbt, double quantile, build_time_t time); void circuit_build_times_update_alpha(circuit_build_times_t *cbt); double circuit_build_times_cdf(circuit_build_times_t *cbt, double x); +int circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt); +void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt); #endif /********************************* circuitlist.c ***********************/ diff --git a/src/or/test.c b/src/or/test.c index ea8ce86cfd..c6cd6a8a45 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -3450,6 +3450,7 @@ test_circuit_timeout(void) timeout1 = circuit_build_times_calculate_timeout(&estimate, BUILDTIMEOUT_QUANTILE_CUTOFF); log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout1, estimate.Xm); + /* XXX: 5% distribution error may not be the right metric */ } while (fabs(circuit_build_times_cdf(&initial, timeout0) - circuit_build_times_cdf(&initial, timeout1)) > 0.05 /* 5% error */ @@ -3468,6 +3469,30 @@ test_circuit_timeout(void) test_assert(fabs(circuit_build_times_cdf(&initial, timeout0) - circuit_build_times_cdf(&initial, timeout2)) < 0.05); + /* Generate MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS timeouts + * and 1-that regular values. Then check for timeout error + * Do the same for one less timeout */ + for (i = 0; i < RECENT_CIRCUITS; i++) { + circuit_build_times_add_time(&estimate, + circuit_build_times_generate_sample(&estimate, 0, + BUILDTIMEOUT_QUANTILE_CUTOFF)); + circuit_build_times_add_time(&final, + circuit_build_times_generate_sample(&final, 0, + BUILDTIMEOUT_QUANTILE_CUTOFF)); + } + test_assert(!circuit_build_times_check_too_many_timeouts(&estimate)); + test_assert(!circuit_build_times_check_too_many_timeouts(&final)); + + for (i = 0; i < MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS; i++) { + circuit_build_times_add_timeout_worker(&estimate); + if (i < MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS-1) { + circuit_build_times_add_timeout_worker(&final); + } + } + + test_assert(circuit_build_times_check_too_many_timeouts(&estimate)); + test_assert(!circuit_build_times_check_too_many_timeouts(&final)); + done: return; } -- cgit v1.2.3-54-g00ecf From 95735e547838e05a574b55da00d3d04c1084452a Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Mon, 31 Aug 2009 23:09:54 -0700 Subject: Fix the math.h log() conflict. It was compiling, but causing segfaults. Also, adjust when the timer starts for new test circs and save state every 25 circuits. --- src/or/circuitbuild.c | 50 +++++++++++++++++++++++++++++++++++++------------- src/or/circuitlist.c | 2 ++ src/or/circuituse.c | 1 + src/or/or.h | 3 +++ 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index a9ae139f13..d50748ab1f 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -9,25 +9,39 @@ * \brief The actual details of building circuits. **/ -// XXX: Move this noise to common/compat.c? -#include +#define CIRCUIT_PRIVATE -// also use pow() +#include "or.h" +#include "crypto.h" +/* + * This madness is needed because if we simply #undef log + * before including or.h or log.h, we get linker collisions + * and random segfaults due to memory corruption (and + * not even at calls to log() either!) + */ +#undef log + +/* + * Linux doesn't provide lround in math.h by default, + * but mac os does... Its best just to leave math.h + * out of the picture entirely. + */ +//#define log math_h_log +//#include +//#undef log long int lround(double x); double ln(double x); +double log(double x); +double pow(double x, double y); double ln(double x) { return log(x); } -#undef log -#define CIRCUIT_PRIVATE - -#include "or.h" -#include "crypto.h" +#define log _log /********* START VARIABLES **********/ /** Global list of circuit build times */ @@ -98,12 +112,17 @@ circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) return -1; } - cbt->last_circ_at = approx_time(); cbt->circuit_build_times[cbt->build_times_idx] = time; cbt->build_times_idx = (cbt->build_times_idx + 1) % NCIRCUITS_TO_OBSERVE; if (cbt->total_build_times < NCIRCUITS_TO_OBSERVE) cbt->total_build_times++; + if ((cbt->total_build_times % BUILD_TIMES_SAVE_STATE_EVERY) == 0) { + /* Save state every 100 circuit builds */ + if (!get_options()->AvoidDiskWrites) + or_state_mark_dirty(get_or_state(), 0); + } + return 0; } @@ -471,8 +490,10 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) get_options()->CircuitBuildTimeout = lround(timeout/1000.0); log_notice(LD_CIRC, - "Set circuit build timeout to %d based on %d recent circuit times", - get_options()->CircuitBuildTimeout, RECENT_CIRCUITS); + "Reset circuit build timeout to %d (Xm: %d a: %lf) based on " + "%d recent circuit times", + get_options()->CircuitBuildTimeout, cbt->Xm, cbt->alpha, + RECENT_CIRCUITS); reset: @@ -532,8 +553,11 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) get_options()->CircuitBuildTimeout = lround(timeout/1000.0); log_info(LD_CIRC, - "Set circuit build timeout to %d based on %d circuit times", - get_options()->CircuitBuildTimeout, cbt->total_build_times); + "Set circuit build timeout to %d (Xm: %d a: %lf) based on " + "%d circuit times", + get_options()->CircuitBuildTimeout, cbt->Xm, cbt->alpha, + cbt->total_build_times); + } /** Iterate over values of circ_id, starting from conn-\>next_circ_id, diff --git a/src/or/circuitlist.c b/src/or/circuitlist.c index 54bda94001..259666732a 100644 --- a/src/or/circuitlist.c +++ b/src/or/circuitlist.c @@ -408,6 +408,8 @@ origin_circuit_new(void) init_circuit_base(TO_CIRCUIT(circ)); + circ_times.last_circ_at = approx_time(); + return circ; } diff --git a/src/or/circuituse.c b/src/or/circuituse.c index 844ea72883..d53cb198a2 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -527,6 +527,7 @@ circuit_predict_and_launch_new(void) log_info(LD_CIRC, "Have %d clean circs need another buildtime test circ.", num); circuit_launch_by_router(CIRCUIT_PURPOSE_C_GENERAL, NULL, flags); + return; } } diff --git a/src/or/or.h b/src/or/or.h index 809e38572f..aeca0226a1 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2877,6 +2877,9 @@ typedef uint32_t build_time_t; /* How often in seconds should we build a test circuit */ #define BUILD_TIMES_TEST_FREQUENCY 60 +/* Save state every 25 circuits */ +#define BUILD_TIMES_SAVE_STATE_EVERY 25 + typedef struct { build_time_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; time_t network_last_live; -- cgit v1.2.3-54-g00ecf From c4e6b3eadb53f382793af9550496c4528faea6a1 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 1 Sep 2009 08:07:26 -0700 Subject: Fix timeout edge case when we get enough samples. Also switch Xm calculation to mode, not min. --- src/or/circuitbuild.c | 84 ++++++++++++++++++++++++++++++++++----------------- src/or/or.h | 1 + 2 files changed, 58 insertions(+), 27 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index d50748ab1f..6d033701ae 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -45,6 +45,7 @@ ln(double x) /********* START VARIABLES **********/ /** Global list of circuit build times */ +// FIXME: Add this as a member for entry_guard_t instead of global? circuit_build_times_t circ_times; /** A global list of all circuits at this hop. */ @@ -126,23 +127,6 @@ circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) return 0; } -/** - * Calculate histogram - */ -static void -circuit_build_times_create_histogram(circuit_build_times_t *cbt, - build_time_t *histogram) -{ - int i, c; - // calculate histogram - for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { - if (cbt->circuit_build_times[i] == 0) continue; /* 0 <-> uninitialized */ - - c = (cbt->circuit_build_times[i] / BUILDTIME_BIN_WIDTH); - histogram[c]++; - } -} - /** * Find maximum circuit build time */ @@ -174,6 +158,46 @@ circuit_build_times_min(circuit_build_times_t *cbt) return min_build_time; } +/** + * Calculate histogram + */ +static uint32_t * +circuit_build_times_create_histogram(circuit_build_times_t *cbt, + build_time_t *nbins) +{ + uint32_t *histogram; + build_time_t max_build_time = circuit_build_times_max(cbt); + int i, c; + + *nbins = 1 + (max_build_time / BUILDTIME_BIN_WIDTH); + histogram = tor_malloc_zero(*nbins * sizeof(build_time_t)); + + // calculate histogram + for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { + if (cbt->circuit_build_times[i] == 0) continue; /* 0 <-> uninitialized */ + + c = (cbt->circuit_build_times[i] / BUILDTIME_BIN_WIDTH); + histogram[c]++; + } + + return histogram; +} + +static build_time_t +circuit_build_times_mode(circuit_build_times_t *cbt) +{ + build_time_t i, nbins, max_bin=0; + uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins); + + for (i = 0; i < nbins; i++) { + if (histogram[i] > histogram[max_bin]) { + max_bin = i; + } + } + + return max_bin*BUILDTIME_BIN_WIDTH; +} + /** * output a histogram of current circuit build times. * @@ -184,15 +208,12 @@ void circuit_build_times_update_state(circuit_build_times_t *cbt, or_state_t *state, int do_unit) { - build_time_t max_build_time = 0, *histogram; - int i = 0, nbins = 0; + uint32_t *histogram; + build_time_t i = 0; + build_time_t nbins = 0; config_line_t **next, *line; - max_build_time = circuit_build_times_max(cbt); - nbins = 1 + (max_build_time / BUILDTIME_BIN_WIDTH); - histogram = tor_malloc_zero(nbins * sizeof(build_time_t)); - - circuit_build_times_create_histogram(cbt, histogram); + histogram = circuit_build_times_create_histogram(cbt, &nbins); // write to state config_free_lines(state->BuildtimeHistogram); next = &state->BuildtimeHistogram; @@ -277,18 +298,25 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) int n=0,i=0; /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */ - cbt->Xm = circuit_build_times_min(cbt); + /* We sort of cheat here and make our samples slightly more pareto-like + * and less frechet-like. */ + cbt->Xm = circuit_build_times_mode(cbt); for (i=0; i< NCIRCUITS_TO_OBSERVE; i++) { if (!x[i]) continue; - a += ln(x[i]); + + // Hrmm, should we count < Xm as Xm or just drop + if (x[i] < cbt->Xm) a += ln(cbt->Xm); + else a += ln(x[i]); n++; } + if (n!=cbt->total_build_times) { log_err(LD_CIRC, "Discrepency in build times count: %d vs %d", n, cbt->total_build_times); } tor_assert(n==cbt->total_build_times); + a -= n*ln(cbt->Xm); a = n/a; @@ -502,6 +530,7 @@ reset: cbt->pre_timeouts = 0; cbt->total_build_times = 0; cbt->build_times_idx = 0; + cbt->estimated = 0; return 1; } @@ -522,7 +551,7 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt) return; } - if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) { + if (!cbt->estimated) { /* Store a timeout before we have enough data as special */ cbt->pre_timeouts++; return; @@ -550,6 +579,7 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) timeout = circuit_build_times_calculate_timeout(cbt, BUILDTIMEOUT_QUANTILE_CUTOFF); + cbt->estimated = 1; get_options()->CircuitBuildTimeout = lround(timeout/1000.0); log_info(LD_CIRC, diff --git a/src/or/or.h b/src/or/or.h index aeca0226a1..0ab382fbdd 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2889,6 +2889,7 @@ typedef struct { int pre_timeouts; build_time_t Xm; double alpha; + int estimated; } circuit_build_times_t; extern circuit_build_times_t circ_times; -- cgit v1.2.3-54-g00ecf From fca84469496e19542127a0c0f65b933a3eee4104 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 1 Sep 2009 15:40:54 -0700 Subject: Fix a couple of assert bugs. --- src/or/circuitbuild.c | 32 ++++++++++++++++++++++---------- src/or/or.h | 7 ++++--- src/or/test.c | 6 ++++-- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 6d033701ae..d3f95254c6 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -46,6 +46,10 @@ ln(double x) /********* START VARIABLES **********/ /** Global list of circuit build times */ // FIXME: Add this as a member for entry_guard_t instead of global? +// Then we could do per-guard statistics, as guards are likely to +// vary in their own latency. The downside of this is that guards +// can change frequently, so we'd be building a lot more circuits +// most likely. circuit_build_times_t circ_times; /** A global list of all circuits at this hop. */ @@ -209,7 +213,7 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, or_state_t *state, int do_unit) { uint32_t *histogram; - build_time_t i = 0; + int i = 0; build_time_t nbins = 0; config_line_t **next, *line; @@ -221,8 +225,11 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, state->TotalBuildTimes = cbt->total_build_times; - // total build times? - for (i = 0; i < nbins; i++) { + /* Write the bins in reverse so that on startup, the faster + times are at the end. This is needed because of + the checks in circuit_build_times_check_too_many_timeouts() + which check the end of the array for recent values */ + for (i = nbins-1; i >= 0; i--) { // compress the histogram by skipping the blanks if (histogram[i] == 0) continue; *next = line = tor_malloc_zero(sizeof(config_line_t)); @@ -372,11 +379,12 @@ circuit_build_times_generate_sample(circuit_build_times_t *cbt, } void -circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt) +circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, + double quantile_cutoff) { /* Generate 0.8-1.0... */ build_time_t gentime = circuit_build_times_generate_sample(cbt, - BUILDTIMEOUT_QUANTILE_CUTOFF, 1.0); + quantile_cutoff, 1.0); if (gentime < (build_time_t)get_options()->CircuitBuildTimeout*1000) { log_warn(LD_CIRC, @@ -413,13 +421,14 @@ circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) { /* Store a timeout as a random position on this curve. */ if (cbt->pre_timeouts) { + double timeout_quantile = 1.0- + ((double)cbt->pre_timeouts)/cbt->total_build_times; cbt->Xm = circuit_build_times_min(cbt); // Use current timeout to get an estimate on alpha - circuit_build_times_initial_alpha(cbt, - 1.0-((double)cbt->pre_timeouts)/cbt->total_build_times, + circuit_build_times_initial_alpha(cbt, timeout_quantile, get_options()->CircuitBuildTimeout*1000); while (cbt->pre_timeouts-- != 0) { - circuit_build_times_add_timeout_worker(cbt); + circuit_build_times_add_timeout_worker(cbt, timeout_quantile); } cbt->pre_timeouts = 0; } @@ -454,8 +463,11 @@ int circuit_build_times_is_network_live(circuit_build_times_t *cbt) { time_t now = approx_time(); - if (now - cbt->network_last_live > NETWORK_LIVE_INTERVAL) + if (now - cbt->network_last_live > NETWORK_LIVE_INTERVAL) { + log_info(LD_CIRC, "Network is no longer live. Dead for %ld seconds.", + now - cbt->network_last_live); return 0; + } return 1; } @@ -558,7 +570,7 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt) } circuit_build_times_count_pretimeouts(cbt); - circuit_build_times_add_timeout_worker(cbt); + circuit_build_times_add_timeout_worker(cbt, BUILDTIMEOUT_QUANTILE_CUTOFF); } void diff --git a/src/or/or.h b/src/or/or.h index 0ab382fbdd..66db091e5e 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2877,8 +2877,8 @@ typedef uint32_t build_time_t; /* How often in seconds should we build a test circuit */ #define BUILD_TIMES_TEST_FREQUENCY 60 -/* Save state every 25 circuits */ -#define BUILD_TIMES_SAVE_STATE_EVERY 25 +/* Save state every 5 circuits */ +#define BUILD_TIMES_SAVE_STATE_EVERY 5 typedef struct { build_time_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; @@ -2916,7 +2916,8 @@ void circuit_build_times_initial_alpha(circuit_build_times_t *cbt, void circuit_build_times_update_alpha(circuit_build_times_t *cbt); double circuit_build_times_cdf(circuit_build_times_t *cbt, double x); int circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt); -void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt); +void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, + double quantile_cutoff); #endif /********************************* circuitlist.c ***********************/ diff --git a/src/or/test.c b/src/or/test.c index c6cd6a8a45..a7f56fe4d1 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -3484,9 +3484,11 @@ test_circuit_timeout(void) test_assert(!circuit_build_times_check_too_many_timeouts(&final)); for (i = 0; i < MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS; i++) { - circuit_build_times_add_timeout_worker(&estimate); + circuit_build_times_add_timeout_worker(&estimate, + BUILDTIMEOUT_QUANTILE_CUTOFF); if (i < MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS-1) { - circuit_build_times_add_timeout_worker(&final); + circuit_build_times_add_timeout_worker(&final, + BUILDTIMEOUT_QUANTILE_CUTOFF); } } -- cgit v1.2.3-54-g00ecf From fd412549fdd6631c12d1e6d9092a9506f60d9789 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 1 Sep 2009 20:13:52 -0700 Subject: Update proposal to bring it more in-line with implementation. --- .../proposals/151-path-selection-improvements.txt | 80 +++++++++++----------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/doc/spec/proposals/151-path-selection-improvements.txt b/doc/spec/proposals/151-path-selection-improvements.txt index 3d5f07d3ab..df19e0f71f 100644 --- a/doc/spec/proposals/151-path-selection-improvements.txt +++ b/doc/spec/proposals/151-path-selection-improvements.txt @@ -2,7 +2,7 @@ Filename: 151-path-selection-improvements.txt Title: Improving Tor Path Selection Author: Fallon Chen, Mike Perry Created: 5-Jul-2008 -Status: Draft +Status: Implemented Overview @@ -22,51 +22,37 @@ Implementation Storing Build Times - Circuit build times will be stored in the circular array - 'circuit_build_times' consisting of uint16_t elements as milliseconds. - The total size of this array will be based on the number of circuits + Circuit build times are stored in the circular array + 'circuit_build_times' consisting of uint32_t elements as milliseconds. + The total size of this array is based on the number of circuits it takes to converge on a good fit of the long term distribution of the circuit builds for a fixed link. We do not want this value to be too large, because it will make it difficult for clients to adapt to moving between different links. - From our initial observations, this value appears to be on the order - of 1000, but will be configurable in a #define NCIRCUITS_TO_OBSERVE. - The exact value for this #define will be determined by performing - goodness of fit tests using measurments obtained from the shufflebt.py - script from TorFlow. + From our observations, this value appears to be on the order of 1000, + but is configurable in a #define NCIRCUITS_TO_OBSERVE. Long Term Storage - The long-term storage representation will be implemented by storing a + The long-term storage representation is implemented by storing a histogram with BUILDTIME_BIN_WIDTH millisecond buckets (default 50) when - writing out the statistics to disk. The format of this histogram on disk - is yet to be finalized, but it will likely be of the format - 'CircuitBuildTime ', with the total specified as - 'TotalBuildTimes ' + writing out the statistics to disk. The format this takes in the + state file is 'CircuitBuildTime ', with the total + specified as 'TotalBuildTimes ' Example: TotalBuildTimes 100 - CircuitBuildTimeBin 1 50 - CircuitBuildTimeBin 2 25 - CircuitBuildTimeBin 3 13 + CircuitBuildTimeBin 0 50 + CircuitBuildTimeBin 50 25 + CircuitBuildTimeBin 100 13 ... - Reading the histogram in will entail multiplying each bin by the - BUILDTIME_BIN_WIDTH and then inserting values into the - circuit_build_times array each with the value of - *BUILDTIME_BIN_WIDTH. In order to evenly distribute the - values in the circular array, a form of index skipping must - be employed. Values from bin #N with bin count C and total T - will occupy indexes specified by N+((T/C)*k)-1, where k is the - set of integers ranging from 0 to C-1. - - For example, this would mean that the values from bin 1 would - occupy indexes 1+(100/50)*k-1, or 0, 2, 4, 6, 8, 10 and so on. - The values for bin 2 would occupy positions 1, 5, 9, 13. Collisions - will be inserted at the first empty position in the array greater - than the selected index (which may requiring looping around the - array back to index 0). + Reading the histogram in will entail inserting values + into the circuit_build_times array each with the value of + milliseconds. In order to evenly distribute the values + in the circular array, the Fisher-Yates shuffle will be performed + after reading values from the bins. Learning the CircuitBuildTimeout @@ -77,14 +63,28 @@ Implementation fitting the data using the estimators at http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation. - The timeout itself will be calculated by solving the CDF for the - a percentile cutoff BUILDTIME_PERCENT_CUTOFF. This value - represents the percentage of paths the Tor client will accept out of - the total number of paths. We have not yet determined a good - cutoff for this mathematically, but 85% seems a good choice for now. + The timeout itself is calculated by using the Quartile function (the + inverted CDF) to give us the value on the CDF such that + BUILDTIME_PERCENT_CUTOFF (80%) of the mass of the distribution is + below the timeout value. - From http://en.wikipedia.org/wiki/Pareto_distribution#Definition, - the calculation we need is pow(BUILDTIME_PERCENT_CUTOFF/100.0, k)/Xm. + Thus, we expect that the Tor client will accept the fastest 80% of + the total number of paths on the network. + + Detecting Changing Network Conditions + + We attempt to detect both network connectivty loss and drastic + changes in the timeout characteristics. Network connectivity loss + is detected by recording a timestamp every time Tor either completes + a TLS connection or receives a cell. If this timestamp is more than + 90 seconds in the past, circuit timeouts are no longer counted. + + If more than MAX_RECENT_TIMEOUT_RATE (80%) of the past + RECENT_CIRCUITS (20) time out, we assume the network connection + has changed, and we discard all buildtimes history and compute + a new timeout by estimating a new Pareto curve using the + position on the Pareto Quartile function for the ratio of + timeouts. Testing @@ -104,7 +104,7 @@ Implementation of a new circuit, and the hard cutoff triggers destruction of the circuit. - Good values for hard and soft cutoffs seem to be 85% and 65% + Good values for hard and soft cutoffs seem to be 80% and 60% respectively, but we should eventually justify this with observation. When to Begin Calculation -- cgit v1.2.3-54-g00ecf From 6eba08e22f2b0ab91433d6b641eab45a65a4495d Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 1 Sep 2009 20:27:43 -0700 Subject: Use our variable directly for timeout. Using CircuitBuildTimeout is prone to issues with SIGHUP, etc. Also, shuffle the circuit build times array after loading it in so that newer measurements don't replace chunks of similarly timed measurements. --- src/or/circuitbuild.c | 144 +++++++++++++++++++++++++++++++++----------------- src/or/circuituse.c | 4 +- src/or/config.c | 6 +-- src/or/or.h | 12 +++-- src/or/test.c | 16 ++++-- 5 files changed, 121 insertions(+), 61 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index d3f95254c6..b1c377a82e 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -86,6 +86,8 @@ static smartlist_t *entry_guards = NULL; * and those changes need to be flushed to disk. */ static int entry_guards_dirty = 0; +static int unit_tests = 0; + /********* END VARIABLES ************/ static int circuit_deliver_create_cell(circuit_t *circ, @@ -99,6 +101,34 @@ static int onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice); static void entry_guards_changed(void); static time_t start_of_month(time_t when); +void +circuitbuild_running_unit_tests(void) +{ + unit_tests = 1; +} + +void +circuit_build_times_reset(circuit_build_times_t *cbt) +{ + memset(cbt->circuit_build_times, 0, sizeof(cbt->circuit_build_times)); + cbt->pre_timeouts = 0; + cbt->total_build_times = 0; + cbt->build_times_idx = 0; + cbt->computed = 0; +} + +void +circuit_build_times_init(circuit_build_times_t *cbt) +{ + memset(cbt, 0, sizeof(*cbt)); + + if (!unit_tests && get_options()->CircuitBuildTimeout) { + cbt->timeout = get_options()->CircuitBuildTimeout; + } else { + cbt->timeout = BUILD_TIMEOUT_INITIAL_VALUE; + } +} + /** * circuit_build_times is a circular array, so loop around when * array is full @@ -124,7 +154,7 @@ circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) if ((cbt->total_build_times % BUILD_TIMES_SAVE_STATE_EVERY) == 0) { /* Save state every 100 circuit builds */ - if (!get_options()->AvoidDiskWrites) + if (!unit_tests && !get_options()->AvoidDiskWrites) or_state_mark_dirty(get_or_state(), 0); } @@ -205,15 +235,13 @@ circuit_build_times_mode(circuit_build_times_t *cbt) /** * output a histogram of current circuit build times. * - * XXX: Is do_unit the right way to do this unit test - * noise? */ void circuit_build_times_update_state(circuit_build_times_t *cbt, - or_state_t *state, int do_unit) + or_state_t *state) { uint32_t *histogram; - int i = 0; + build_time_t i = 0; build_time_t nbins = 0; config_line_t **next, *line; @@ -225,11 +253,7 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, state->TotalBuildTimes = cbt->total_build_times; - /* Write the bins in reverse so that on startup, the faster - times are at the end. This is needed because of - the checks in circuit_build_times_check_too_many_timeouts() - which check the end of the array for recent values */ - for (i = nbins-1; i >= 0; i--) { + for (i = 0; i < nbins; i++) { // compress the histogram by skipping the blanks if (histogram[i] == 0) continue; *next = line = tor_malloc_zero(sizeof(config_line_t)); @@ -240,7 +264,7 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, next = &(line->next); } - if (!do_unit) { + if (!unit_tests) { if (!get_options()->AvoidDiskWrites) or_state_mark_dirty(get_or_state(), 0); } @@ -248,15 +272,35 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, if (histogram) tor_free(histogram); } +/* Stolen from http://en.wikipedia.org/wiki/Fisher\u2013Yates_shuffle */ +static void +circuit_build_times_shuffle_array(circuit_build_times_t *cbt) +{ + int n = cbt->total_build_times; + + /* This code can only be run on a compact array */ + tor_assert(cbt->total_build_times == cbt->build_times_idx); + while (n-- > 1) { + int k = crypto_rand_int(n + 1); /* 0 <= k <= n. */ + build_time_t tmp = cbt->circuit_build_times[k]; + cbt->circuit_build_times[k] = cbt->circuit_build_times[n]; + cbt->circuit_build_times[n] = tmp; + } +} + /** Load histogram from state */ int circuit_build_times_parse_state(circuit_build_times_t *cbt, or_state_t *state, char **msg) { - int tot_values = 0, lines = 0; + int tot_values = 0, N = 0; config_line_t *line; + int i; msg = NULL; - memset(cbt, 0, sizeof(*cbt)); + circuit_build_times_init(cbt); + + /* We don't support decreasing the table size yet */ + tor_assert(state->TotalBuildTimes <= NCIRCUITS_TO_OBSERVE); for (line = state->BuildtimeHistogram; line; line = line->next) { smartlist_t * args = smartlist_create(); @@ -269,7 +313,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, } else { const char *ms_str = smartlist_get(args,0); const char *count_str = smartlist_get(args,1); - uint32_t count, i; + uint32_t count, k; build_time_t ms; int ok; ms = tor_parse_ulong(ms_str, 0, 0, BUILD_TIME_MAX, &ok, NULL); @@ -284,15 +328,27 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, "Unparsable bin count"); break; } - lines++; - for (i = 0; i < count; i++) { + + for (k = 0; k < count; k++) { circuit_build_times_add_time(cbt, ms); - tot_values++; } + N++; } } - log_info(LD_CIRC, "Loaded %d values from %d lines in circuit time histogram", - tot_values, lines); + + circuit_build_times_shuffle_array(cbt); + + /* Verify that we didn't overwrite any indexes */ + for (i=0; i < NCIRCUITS_TO_OBSERVE; i++) { + if (!cbt->circuit_build_times[i]) + break; + tot_values++; + } + log_info(LD_CIRC, + "Loaded %d/%d values from %d lines in circuit time histogram", + tot_values, cbt->total_build_times, N); + tor_assert(cbt->total_build_times == state->TotalBuildTimes); + tor_assert(tot_values == cbt->total_build_times); circuit_build_times_set_timeout(cbt); return (msg ? -1 : 0); } @@ -368,8 +424,8 @@ build_time_t circuit_build_times_generate_sample(circuit_build_times_t *cbt, double q_lo, double q_hi) { - uint32_t r = crypto_rand_int(UINT32_MAX-1); - double u = q_lo + ((q_hi-q_lo)*r)/(1.0*UINT32_MAX); + uint64_t r = crypto_rand_uint64(UINT64_MAX-1); + double u = q_lo + ((q_hi-q_lo)*r)/(1.0*UINT64_MAX); build_time_t ret; tor_assert(0 <= u && u < 1.0); @@ -386,12 +442,11 @@ circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, build_time_t gentime = circuit_build_times_generate_sample(cbt, quantile_cutoff, 1.0); - if (gentime < (build_time_t)get_options()->CircuitBuildTimeout*1000) { + if (gentime < (build_time_t)cbt->timeout*1000) { log_warn(LD_CIRC, "Generated a synthetic timeout LESS than the current timeout: %u vs %d", - gentime, get_options()->CircuitBuildTimeout*1000); - tor_assert(gentime >= - (build_time_t)get_options()->CircuitBuildTimeout*1000); + gentime, cbt->timeout*1000); + tor_assert(gentime >= (build_time_t)cbt->timeout*1000); } else if (gentime > BUILD_TIME_MAX) { gentime = BUILD_TIME_MAX; log_info(LD_CIRC, @@ -426,7 +481,7 @@ circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) cbt->Xm = circuit_build_times_min(cbt); // Use current timeout to get an estimate on alpha circuit_build_times_initial_alpha(cbt, timeout_quantile, - get_options()->CircuitBuildTimeout*1000); + cbt->timeout*1000); while (cbt->pre_timeouts-- != 0) { circuit_build_times_add_timeout_worker(cbt, timeout_quantile); } @@ -487,11 +542,10 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) for (i = (cbt->build_times_idx - RECENT_CIRCUITS) % NCIRCUITS_TO_OBSERVE; i != cbt->build_times_idx; i = (i + 1) % NCIRCUITS_TO_OBSERVE) { - if (cbt->circuit_build_times[i] < Xm) { + if (cbt->circuit_build_times[i] && cbt->circuit_build_times[i] < Xm) { Xm = cbt->circuit_build_times[i]; } - if (cbt->circuit_build_times[i] > - (build_time_t)get_options()->CircuitBuildTimeout*1000) { + if (cbt->circuit_build_times[i] > (build_time_t)cbt->timeout*1000) { timeout_rate++; } } @@ -507,42 +561,36 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) "Network connection type appears to have changed. " "Resetting timeouts."); - if (Xm >= (build_time_t)get_options()->CircuitBuildTimeout*1000) { + if (Xm >= (build_time_t)cbt->timeout*1000) { Xm = circuit_build_times_min(cbt); - if (Xm >= (build_time_t)get_options()->CircuitBuildTimeout*1000) { + if (Xm >= (build_time_t)cbt->timeout*1000) { /* No circuits have completed */ - get_options()->CircuitBuildTimeout *= 2; + cbt->timeout *= 2; log_warn(LD_CIRC, "Adjusting CircuitBuildTimeout to %d in the hopes that " - "some connections will succeed", - get_options()->CircuitBuildTimeout); + "some connections will succeed", cbt->timeout); goto reset; } } cbt->Xm = Xm; circuit_build_times_initial_alpha(cbt, 1.0-timeout_rate, - get_options()->CircuitBuildTimeout*1000.0); + cbt->timeout*1000.0); timeout = circuit_build_times_calculate_timeout(cbt, BUILDTIMEOUT_QUANTILE_CUTOFF); - get_options()->CircuitBuildTimeout = lround(timeout/1000.0); + cbt->timeout = lround(timeout/1000.0); log_notice(LD_CIRC, "Reset circuit build timeout to %d (Xm: %d a: %lf) based on " - "%d recent circuit times", - get_options()->CircuitBuildTimeout, cbt->Xm, cbt->alpha, + "%d recent circuit times", cbt->timeout, cbt->Xm, cbt->alpha, RECENT_CIRCUITS); reset: - /* Reset all data. Do we need a constructor? */ - memset(cbt->circuit_build_times, 0, sizeof(cbt->circuit_build_times)); - cbt->pre_timeouts = 0; - cbt->total_build_times = 0; - cbt->build_times_idx = 0; - cbt->estimated = 0; + /* Reset all data */ + circuit_build_times_reset(cbt); return 1; } @@ -563,7 +611,7 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt) return; } - if (!cbt->estimated) { + if (!cbt->computed) { /* Store a timeout before we have enough data as special */ cbt->pre_timeouts++; return; @@ -591,13 +639,13 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) timeout = circuit_build_times_calculate_timeout(cbt, BUILDTIMEOUT_QUANTILE_CUTOFF); - cbt->estimated = 1; - get_options()->CircuitBuildTimeout = lround(timeout/1000.0); + cbt->computed = 1; + cbt->timeout = lround(timeout/1000.0); log_info(LD_CIRC, "Set circuit build timeout to %d (Xm: %d a: %lf) based on " "%d circuit times", - get_options()->CircuitBuildTimeout, cbt->Xm, cbt->alpha, + cbt->timeout, cbt->Xm, cbt->alpha, cbt->total_build_times); } diff --git a/src/or/circuituse.c b/src/or/circuituse.c index d53cb198a2..4162790ec3 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -264,8 +264,8 @@ void circuit_expire_building(time_t now) { circuit_t *victim, *circ = global_circuitlist; - time_t general_cutoff = now - get_options()->CircuitBuildTimeout; - time_t begindir_cutoff = now - get_options()->CircuitBuildTimeout/2; + time_t general_cutoff = now - circ_times.timeout; + time_t begindir_cutoff = now - circ_times.timeout/2; time_t introcirc_cutoff = begindir_cutoff; cpath_build_state_t *build_state; diff --git a/src/or/config.c b/src/or/config.c index 45ce5cf13b..1505dc3e4e 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -164,7 +164,7 @@ static config_var_t _option_vars[] = { V(BridgeRecordUsageByCountry, BOOL, "1"), V(BridgeRelay, BOOL, "0"), V(CellStatistics, BOOL, "0"), - V(CircuitBuildTimeout, INTERVAL, "1 minute"), + V(CircuitBuildTimeout, INTERVAL, "0"), V(CircuitIdleTimeout, INTERVAL, "1 hour"), V(ClientDNSRejectInternalAddresses, BOOL,"1"), V(ClientOnly, BOOL, "0"), @@ -2922,7 +2922,7 @@ compute_publishserverdescriptor(or_options_t *options) /** Lowest allowable value for CircuitBuildTimeout; values too low will * increase network load because of failing connections being retried, and * might prevent users from connecting to the network at all. */ -#define MIN_CIRCUIT_BUILD_TIMEOUT 5 +#define MIN_CIRCUIT_BUILD_TIMEOUT 3 /** Lowest allowable value for MaxCircuitDirtiness; if this is too low, Tor * will generate too many circuits and potentially overload the network. */ @@ -5207,7 +5207,7 @@ or_state_save(time_t now) * to avoid redundant writes. */ entry_guards_update_state(global_state); rep_hist_update_state(global_state); - circuit_build_times_update_state(&circ_times, global_state, 0); + circuit_build_times_update_state(&circ_times, global_state); if (accounting_is_enabled(get_options())) accounting_run_housekeeping(now); diff --git a/src/or/or.h b/src/or/or.h index 66db091e5e..6f4ad4516d 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2863,7 +2863,7 @@ void entry_guards_free_all(void); #define NCIRCUITS_TO_OBSERVE 5000 /* approx 1.5 weeks worth of circuits */ #define BUILDTIME_BIN_WIDTH 50 -#define MAX_RECENT_TIMEOUT_RATE 0.80 +#define MAX_RECENT_TIMEOUT_RATE 0.7999999 /* TODO: This should be moved to the consensus */ #define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8 @@ -2874,6 +2874,8 @@ typedef uint32_t build_time_t; /* Have we recieved a cell in the last 90 seconds? */ #define NETWORK_LIVE_INTERVAL 90 +#define BUILD_TIMEOUT_INITIAL_VALUE 60 + /* How often in seconds should we build a test circuit */ #define BUILD_TIMES_TEST_FREQUENCY 60 @@ -2889,12 +2891,13 @@ typedef struct { int pre_timeouts; build_time_t Xm; double alpha; - int estimated; + int computed; + int timeout; } circuit_build_times_t; extern circuit_build_times_t circ_times; void circuit_build_times_update_state(circuit_build_times_t *cbt, - or_state_t *state, int do_unit); + or_state_t *state); int circuit_build_times_parse_state(circuit_build_times_t *cbt, or_state_t *state, char **msg); void circuit_build_times_add_timeout(circuit_build_times_t *cbt); @@ -2905,6 +2908,7 @@ void circuit_build_times_network_is_live(circuit_build_times_t *cbt); int circuit_build_times_is_network_live(circuit_build_times_t *cbt); int circuit_build_times_needs_circuits(circuit_build_times_t *cbt); int circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt); +void circuit_build_times_init(circuit_build_times_t *cbt); #ifdef CIRCUIT_PRIVATE double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, @@ -2918,6 +2922,8 @@ double circuit_build_times_cdf(circuit_build_times_t *cbt, double x); int circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt); void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, double quantile_cutoff); +void circuitbuild_running_unit_tests(void); +void circuit_build_times_reset(circuit_build_times_t *cbt); #endif /********************************* circuitlist.c ***********************/ diff --git a/src/or/test.c b/src/or/test.c index a7f56fe4d1..7228425241 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -3429,11 +3429,13 @@ test_circuit_timeout(void) int i; char *msg; double timeout1, timeout2; - memset(&initial, 0, sizeof(circuit_build_times_t)); - memset(&estimate, 0, sizeof(circuit_build_times_t)); - memset(&final, 0, sizeof(circuit_build_times_t)); + circuit_build_times_init(&initial); + circuit_build_times_init(&estimate); + circuit_build_times_init(&final); + memset(&state, 0, sizeof(or_state_t)); + circuitbuild_running_unit_tests(); #define timeout0 (30*1000.0) initial.Xm = 750; circuit_build_times_initial_alpha(&initial, BUILDTIMEOUT_QUANTILE_CUTOFF, @@ -3449,6 +3451,7 @@ test_circuit_timeout(void) circuit_build_times_update_alpha(&estimate); timeout1 = circuit_build_times_calculate_timeout(&estimate, BUILDTIMEOUT_QUANTILE_CUTOFF); + circuit_build_times_set_timeout(&estimate); log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout1, estimate.Xm); /* XXX: 5% distribution error may not be the right metric */ } while (fabs(circuit_build_times_cdf(&initial, timeout0) - @@ -3458,12 +3461,14 @@ test_circuit_timeout(void) test_assert(estimate.total_build_times < NCIRCUITS_TO_OBSERVE); - circuit_build_times_update_state(&estimate, &state, 1); + circuit_build_times_update_state(&estimate, &state); test_assert(circuit_build_times_parse_state(&final, &state, &msg) == 0); circuit_build_times_update_alpha(&final); timeout2 = circuit_build_times_calculate_timeout(&final, BUILDTIMEOUT_QUANTILE_CUTOFF); + + circuit_build_times_set_timeout(&final); log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout2, final.Xm); test_assert(fabs(circuit_build_times_cdf(&initial, timeout0) - @@ -3480,6 +3485,7 @@ test_circuit_timeout(void) circuit_build_times_generate_sample(&final, 0, BUILDTIMEOUT_QUANTILE_CUTOFF)); } + test_assert(!circuit_build_times_check_too_many_timeouts(&estimate)); test_assert(!circuit_build_times_check_too_many_timeouts(&final)); @@ -3492,7 +3498,7 @@ test_circuit_timeout(void) } } - test_assert(circuit_build_times_check_too_many_timeouts(&estimate)); + test_assert(circuit_build_times_check_too_many_timeouts(&estimate) == 1); test_assert(!circuit_build_times_check_too_many_timeouts(&final)); done: -- cgit v1.2.3-54-g00ecf From 8210336182b9f78fc5ca616280c2ce8e78cf6620 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 1 Sep 2009 21:12:47 -0700 Subject: More detail for some log msgs. --- src/or/circuitbuild.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index b1c377a82e..0f2972fe6c 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -583,9 +583,9 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) cbt->timeout = lround(timeout/1000.0); log_notice(LD_CIRC, - "Reset circuit build timeout to %d (Xm: %d a: %lf) based on " - "%d recent circuit times", cbt->timeout, cbt->Xm, cbt->alpha, - RECENT_CIRCUITS); + "Reset circuit build timeout to %d (%lf, Xm: %d, a: %lf) based on " + "%d recent circuit times", cbt->timeout, timeout, cbt->Xm, + cbt->alpha, RECENT_CIRCUITS); reset: @@ -643,9 +643,8 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) cbt->timeout = lround(timeout/1000.0); log_info(LD_CIRC, - "Set circuit build timeout to %d (Xm: %d a: %lf) based on " - "%d circuit times", - cbt->timeout, cbt->Xm, cbt->alpha, + "Set circuit build timeout to %d (%lf, Xm: %d, a: %lf) based on " + "%d circuit times", cbt->timeout, timeout, cbt->Xm, cbt->alpha, cbt->total_build_times); } -- cgit v1.2.3-54-g00ecf From 535423a3bb87fcdafc4f25f8a7e3898b127ff77c Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Wed, 2 Sep 2009 15:29:34 -0700 Subject: Resolve mode ties in favor of the higher (slower) mode. --- src/or/circuitbuild.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 0f2972fe6c..0a55d8f86a 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -224,7 +224,7 @@ circuit_build_times_mode(circuit_build_times_t *cbt) uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins); for (i = 0; i < nbins; i++) { - if (histogram[i] > histogram[max_bin]) { + if (histogram[i] >= histogram[max_bin]) { max_bin = i; } } -- cgit v1.2.3-54-g00ecf From b508e4748f42436ad1e9b05970cc4d3c5c1debfc Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Thu, 3 Sep 2009 14:44:01 +0200 Subject: Remove trailing spaces. As if bytes were free... Also correct some typos. --- .../proposals/151-path-selection-improvements.txt | 52 +++++++++++----------- src/or/circuitbuild.c | 9 ++-- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/doc/spec/proposals/151-path-selection-improvements.txt b/doc/spec/proposals/151-path-selection-improvements.txt index df19e0f71f..7821a5dddb 100644 --- a/doc/spec/proposals/151-path-selection-improvements.txt +++ b/doc/spec/proposals/151-path-selection-improvements.txt @@ -8,7 +8,7 @@ Overview The performance of paths selected can be improved by adjusting the CircuitBuildTimeout and avoiding failing guard nodes. This proposal - describes a method of tracking buildtime statistics at the client, and + describes a method of tracking buildtime statistics at the client, and using those statistics to adjust the CircuitBuildTimeout. Motivation @@ -30,15 +30,15 @@ Implementation too large, because it will make it difficult for clients to adapt to moving between different links. - From our observations, this value appears to be on the order of 1000, + From our observations, this value appears to be on the order of 1000, but is configurable in a #define NCIRCUITS_TO_OBSERVE. - + Long Term Storage - The long-term storage representation is implemented by storing a - histogram with BUILDTIME_BIN_WIDTH millisecond buckets (default 50) when + The long-term storage representation is implemented by storing a + histogram with BUILDTIME_BIN_WIDTH millisecond buckets (default 50) when writing out the statistics to disk. The format this takes in the - state file is 'CircuitBuildTime ', with the total + state file is 'CircuitBuildTime ', with the total specified as 'TotalBuildTimes ' Example: @@ -57,7 +57,7 @@ Implementation Learning the CircuitBuildTimeout Based on studies of build times, we found that the distribution of - circuit buildtimes appears to be a Pareto distribution. + circuit buildtimes appears to be a Pareto distribution. We will calculate the parameters for a Pareto distribution fitting the data using the estimators at @@ -68,7 +68,7 @@ Implementation BUILDTIME_PERCENT_CUTOFF (80%) of the mass of the distribution is below the timeout value. - Thus, we expect that the Tor client will accept the fastest 80% of + Thus, we expect that the Tor client will accept the fastest 80% of the total number of paths on the network. Detecting Changing Network Conditions @@ -76,65 +76,65 @@ Implementation We attempt to detect both network connectivty loss and drastic changes in the timeout characteristics. Network connectivity loss is detected by recording a timestamp every time Tor either completes - a TLS connection or receives a cell. If this timestamp is more than + a TLS connection or receives a cell. If this timestamp is more than 90 seconds in the past, circuit timeouts are no longer counted. - If more than MAX_RECENT_TIMEOUT_RATE (80%) of the past + If more than MAX_RECENT_TIMEOUT_RATE (80%) of the past RECENT_CIRCUITS (20) time out, we assume the network connection has changed, and we discard all buildtimes history and compute a new timeout by estimating a new Pareto curve using the position on the Pareto Quartile function for the ratio of - timeouts. + timeouts. Testing After circuit build times, storage, and learning are implemented, the resulting histogram should be checked for consistency by - verifying it persists across successive Tor invocations where + verifying it persists across successive Tor invocations where no circuits are built. In addition, we can also use the existing - buildtime scripts to record build times, and verify that the histogram + buildtime scripts to record build times, and verify that the histogram the python produces matches that which is output to the state file in Tor, and verify that the Pareto parameters and cutoff points also match. - + Soft timeout vs Hard Timeout - - At some point, it may be desirable to change the cutoff from a + + At some point, it may be desirable to change the cutoff from a single hard cutoff that destroys the circuit to a soft cutoff and a hard cutoff, where the soft cutoff merely triggers the building - of a new circuit, and the hard cutoff triggers destruction of the + of a new circuit, and the hard cutoff triggers destruction of the circuit. - Good values for hard and soft cutoffs seem to be 80% and 60% + Good values for hard and soft cutoffs seem to be 80% and 60% respectively, but we should eventually justify this with observation. When to Begin Calculation - The number of circuits to observe (NCIRCUITS_TO_CUTOFF) before - changing the CircuitBuildTimeout will be tunable via a #define. From - our measurements, a good value for NCIRCUITS_TO_CUTOFF appears to be + The number of circuits to observe (NCIRCUITS_TO_CUTOFF) before + changing the CircuitBuildTimeout will be tunable via a #define. From + our measurements, a good value for NCIRCUITS_TO_CUTOFF appears to be on the order of 100. Dealing with Timeouts - Timeouts should be counted as the expectation of the region of + Timeouts should be counted as the expectation of the region of of the Pareto distribution beyond the cutoff. The proposal will be updated with this value soon. - Also, in the event of network failure, the observation mechanism + Also, in the event of network failure, the observation mechanism should stop collecting timeout data. Client Hints Some research still needs to be done to provide initial values for CircuitBuildTimeout based on values learned from modem - users, DSL users, Cable Modem users, and dedicated links. A + users, DSL users, Cable Modem users, and dedicated links. A radiobutton in Vidalia should eventually be provided that - sets CircuitBuildTimeout to one of these values and also + sets CircuitBuildTimeout to one of these values and also provide the option of purging all learned data, should any exist. These values can either be published in the directory, or shipped hardcoded for a particular Tor version. - + Issues Impact on anonymity diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 0a55d8f86a..39e5a43f5e 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -308,7 +308,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0); if (smartlist_len(args) < 2) { *msg = tor_strdup("Unable to parse circuit build times: " - "Too few arguments to CircuitBuildTIme"); + "Too few arguments to CircuitBuildTime"); break; } else { const char *ms_str = smartlist_get(args,0); @@ -375,7 +375,7 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) } if (n!=cbt->total_build_times) { - log_err(LD_CIRC, "Discrepency in build times count: %d vs %d", n, + log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n, cbt->total_build_times); } tor_assert(n==cbt->total_build_times); @@ -396,7 +396,8 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) * * See http://en.wikipedia.org/wiki/Quantile_function, * http://en.wikipedia.org/wiki/Inverse_transform_sampling and - * http://en.wikipedia.org/wiki/Pareto_distribution#Generating_a_random_sample_from_Pareto_distribution + * http://en.wikipedia.org/wiki/Pareto_distribution#Generating_a_ + * random_sample_from_Pareto_distribution * That's right. I'll cite wikipedia all day long. */ double @@ -551,7 +552,7 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) } timeout_rate /= RECENT_CIRCUITS; - /* If more then 80% of our recent circuits are timing out, + /* If more than 80% of our recent circuits are timing out, * we need to re-estimate a new initial alpha and timeout */ if (timeout_rate < MAX_RECENT_TIMEOUT_RATE) { return 0; -- cgit v1.2.3-54-g00ecf From 4b3bc714a3f5253470e1feab80bfd30744d34d44 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Fri, 4 Sep 2009 13:42:58 -0700 Subject: Woops. Fix a couple memory leaks. Also change the max timeout quantile to 0.98, so we can avoid huge synthetic timeout values. --- src/or/circuitbuild.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 39e5a43f5e..3fa446faac 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -229,6 +229,8 @@ circuit_build_times_mode(circuit_build_times_t *cbt) } } + tor_free(histogram); + return max_bin*BUILDTIME_BIN_WIDTH; } @@ -309,6 +311,8 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, if (smartlist_len(args) < 2) { *msg = tor_strdup("Unable to parse circuit build times: " "Too few arguments to CircuitBuildTime"); + SMARTLIST_FOREACH(args, char*, cp, tor_free(cp)); + smartlist_free(args); break; } else { const char *ms_str = smartlist_get(args,0); @@ -333,7 +337,10 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, circuit_build_times_add_time(cbt, ms); } N++; + SMARTLIST_FOREACH(args, char*, cp, tor_free(cp)); + smartlist_free(args); } + } circuit_build_times_shuffle_array(cbt); @@ -439,9 +446,11 @@ void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, double quantile_cutoff) { - /* Generate 0.8-1.0... */ + /* Generate points in [cutoff, 1.0) on the CDF... We want to + * stay a bit short of 1.0 though, because longtail is + * loooooooooooooooooooooooooooooooooooooooooooooooooooong */ build_time_t gentime = circuit_build_times_generate_sample(cbt, - quantile_cutoff, 1.0); + quantile_cutoff, 0.98); if (gentime < (build_time_t)cbt->timeout*1000) { log_warn(LD_CIRC, -- cgit v1.2.3-54-g00ecf From 672e2f6908fda52897c9564b756de743692d1d55 Mon Sep 17 00:00:00 2001 From: Roger Dingledine Date: Sun, 6 Sep 2009 23:14:13 -0400 Subject: space/indent cleanups, plus point out three bugs --- src/or/circuitbuild.c | 63 ++++++++++++++++++++++++++++++--------------------- src/or/circuituse.c | 2 ++ src/or/config.c | 11 ++++----- src/or/or.h | 6 ++--- src/or/test.c | 6 ++--- 5 files changed, 49 insertions(+), 39 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 3fa446faac..50faac1702 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -20,12 +20,13 @@ * and random segfaults due to memory corruption (and * not even at calls to log() either!) */ + /* XXX022 somebody should rename Tor's log() function, so we can + * remove this wart. -RD */ #undef log /* - * Linux doesn't provide lround in math.h by default, - * but mac os does... Its best just to leave math.h - * out of the picture entirely. + * Linux doesn't provide lround in math.h by default, but mac os does... + * It's best just to leave math.h out of the picture entirely. */ //#define log math_h_log //#include @@ -86,6 +87,8 @@ static smartlist_t *entry_guards = NULL; * and those changes need to be flushed to disk. */ static int entry_guards_dirty = 0; +/** If set, we're running the unit tests: we should avoid clobbering + * our state file. */ static int unit_tests = 0; /********* END VARIABLES ************/ @@ -101,12 +104,15 @@ static int onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice); static void entry_guards_changed(void); static time_t start_of_month(time_t when); +/** Make a note that we're running unit tests (rather than running Tor + * itself), so we avoid clobbering our state file. */ void circuitbuild_running_unit_tests(void) { unit_tests = 1; } +/** DOCDOC */ void circuit_build_times_reset(circuit_build_times_t *cbt) { @@ -117,6 +123,7 @@ circuit_build_times_reset(circuit_build_times_t *cbt) cbt->computed = 0; } +/** DOCDOC */ void circuit_build_times_init(circuit_build_times_t *cbt) { @@ -176,6 +183,7 @@ circuit_build_times_max(circuit_build_times_t *cbt) return max_build_time; } +/** DOCDOC */ static build_time_t circuit_build_times_min(circuit_build_times_t *cbt) { @@ -183,7 +191,7 @@ circuit_build_times_min(circuit_build_times_t *cbt) build_time_t min_build_time = BUILD_TIME_MAX; for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) { if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */ - cbt->circuit_build_times[i] < min_build_time) + cbt->circuit_build_times[i] < min_build_time) min_build_time = cbt->circuit_build_times[i]; } if (min_build_time == BUILD_TIME_MAX) { @@ -217,6 +225,7 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt, return histogram; } +/** DOCDOC */ static build_time_t circuit_build_times_mode(circuit_build_times_t *cbt) { @@ -236,7 +245,6 @@ circuit_build_times_mode(circuit_build_times_t *cbt) /** * output a histogram of current circuit build times. - * */ void circuit_build_times_update_state(circuit_build_times_t *cbt, @@ -283,14 +291,14 @@ circuit_build_times_shuffle_array(circuit_build_times_t *cbt) /* This code can only be run on a compact array */ tor_assert(cbt->total_build_times == cbt->build_times_idx); while (n-- > 1) { - int k = crypto_rand_int(n + 1); /* 0 <= k <= n. */ + int k = crypto_rand_int(n + 1); /* 0 <= k <= n. */ build_time_t tmp = cbt->circuit_build_times[k]; cbt->circuit_build_times[k] = cbt->circuit_build_times[n]; cbt->circuit_build_times[n] = tmp; } } -/** Load histogram from state */ +/** Load histogram from state. DOCDOC what else? */ int circuit_build_times_parse_state(circuit_build_times_t *cbt, or_state_t *state, char **msg) @@ -298,16 +306,17 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, int tot_values = 0, N = 0; config_line_t *line; int i; - msg = NULL; + msg = NULL; /* XXX is this a bug? should be *msg, or we'll seg fault + * if we try to set it */ circuit_build_times_init(cbt); /* We don't support decreasing the table size yet */ tor_assert(state->TotalBuildTimes <= NCIRCUITS_TO_OBSERVE); for (line = state->BuildtimeHistogram; line; line = line->next) { - smartlist_t * args = smartlist_create(); + smartlist_t *args = smartlist_create(); smartlist_split_string(args, line->value, " ", - SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0); + SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0); if (smartlist_len(args) < 2) { *msg = tor_strdup("Unable to parse circuit build times: " "Too few arguments to CircuitBuildTime"); @@ -357,9 +366,10 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, tor_assert(cbt->total_build_times == state->TotalBuildTimes); tor_assert(tot_values == cbt->total_build_times); circuit_build_times_set_timeout(cbt); - return (msg ? -1 : 0); + return msg ? -1 : 0; } +/** DOCDOC */ void circuit_build_times_update_alpha(circuit_build_times_t *cbt) { @@ -454,13 +464,14 @@ circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, if (gentime < (build_time_t)cbt->timeout*1000) { log_warn(LD_CIRC, - "Generated a synthetic timeout LESS than the current timeout: %u vs %d", - gentime, cbt->timeout*1000); + "Generated a synthetic timeout LESS than the current timeout: " + "%u vs %d", gentime, cbt->timeout*1000); tor_assert(gentime >= (build_time_t)cbt->timeout*1000); } else if (gentime > BUILD_TIME_MAX) { gentime = BUILD_TIME_MAX; log_info(LD_CIRC, - "Generated a synthetic timeout larger than the max: %u", gentime); + "Generated a synthetic timeout larger than the max: %u", + gentime); } else { log_info(LD_CIRC, "Generated synthetic time %u for timeout", gentime); } @@ -491,7 +502,7 @@ circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) cbt->Xm = circuit_build_times_min(cbt); // Use current timeout to get an estimate on alpha circuit_build_times_initial_alpha(cbt, timeout_quantile, - cbt->timeout*1000); + cbt->timeout*1000); while (cbt->pre_timeouts-- != 0) { circuit_build_times_add_timeout_worker(cbt, timeout_quantile); } @@ -515,7 +526,7 @@ int circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt) { return circuit_build_times_needs_circuits(cbt) && - approx_time()-cbt->last_circ_at > BUILD_TIMES_TEST_FREQUENCY; + approx_time()-cbt->last_circ_at > BUILD_TIMES_TEST_FREQUENCY; } void @@ -568,7 +579,7 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) } log_notice(LD_CIRC, - "Network connection type appears to have changed. " + "Network connection speed appears to have changed. " "Resetting timeouts."); if (Xm >= (build_time_t)cbt->timeout*1000) { @@ -577,8 +588,8 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) /* No circuits have completed */ cbt->timeout *= 2; log_warn(LD_CIRC, - "Adjusting CircuitBuildTimeout to %d in the hopes that " - "some connections will succeed", cbt->timeout); + "Adjusting CircuitBuildTimeout to %d in the hopes that " + "some connections will succeed", cbt->timeout); goto reset; } } @@ -593,9 +604,9 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) cbt->timeout = lround(timeout/1000.0); log_notice(LD_CIRC, - "Reset circuit build timeout to %d (%lf, Xm: %d, a: %lf) based on " - "%d recent circuit times", cbt->timeout, timeout, cbt->Xm, - cbt->alpha, RECENT_CIRCUITS); + "Reset circuit build timeout to %d (%lf, Xm: %d, a: %lf) based " + "on %d recent circuit times", cbt->timeout, timeout, cbt->Xm, + cbt->alpha, RECENT_CIRCUITS); reset: @@ -638,9 +649,9 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) { log_info(LD_CIRC, - "Not enough circuits yet to calculate a new build timeout." - " Need %d more.", - MIN_CIRCUITS_TO_OBSERVE-cbt->total_build_times); + "Not enough circuits yet to calculate a new build timeout." + " Need %d more.", + MIN_CIRCUITS_TO_OBSERVE-cbt->total_build_times); return; } @@ -653,7 +664,7 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) cbt->timeout = lround(timeout/1000.0); log_info(LD_CIRC, - "Set circuit build timeout to %d (%lf, Xm: %d, a: %lf) based on " + "Set circuit build timeout to %d (%lf, Xm: %d, a: %lf) based on " "%d circuit times", cbt->timeout, timeout, cbt->Xm, cbt->alpha, cbt->total_build_times); diff --git a/src/or/circuituse.c b/src/or/circuituse.c index 4162790ec3..f6a41665d1 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -861,6 +861,7 @@ circuit_build_failed(origin_circuit_t *circ) circuit_increment_failure_count(); /* Don't increment failure count, since Bob may have picked * the introduction point maliciously */ + /* XXX Mike, you didn't read my comment above! :) -RD */ /* Alice will pick a new intro point when this one dies, if * the stream in question still cares. No need to act here. */ break; @@ -875,6 +876,7 @@ circuit_build_failed(origin_circuit_t *circ) circuit_increment_failure_count(); /* Don't increment failure count, since Alice may have picked * the rendezvous point maliciously */ + /* XXX Mike, you didn't read my comment above! :) -RD */ log_info(LD_REND, "Couldn't connect to Alice's chosen rend point %s " "(%s hop failed).", diff --git a/src/or/config.c b/src/or/config.c index 1505dc3e4e..cd222595b5 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -409,9 +409,9 @@ static config_var_t _state_vars[] = { V(LastRotatedOnionKey, ISOTIME, NULL), V(LastWritten, ISOTIME, NULL), - VAR("TotalBuildTimes", UINT, TotalBuildTimes, NULL), - VAR("CircuitBuildTimeBin", LINELIST_S, BuildtimeHistogram, NULL), - VAR("BuildtimeHistogram", LINELIST_V, BuildtimeHistogram, NULL), + V("TotalBuildTimes", UINT, NULL), + VAR("CircuitBuildTimeBin", LINELIST_S, BuildtimeHistogram, NULL), + VAR("BuildtimeHistogram", LINELIST_V, BuildtimeHistogram, NULL), { NULL, CONFIG_TYPE_OBSOLETE, 0, NULL } }; @@ -5068,13 +5068,10 @@ or_state_set(or_state_t *new_state) log_warn(LD_GENERAL,"Unparseable bandwidth history state: %s",err); tor_free(err); } - - if (circuit_build_times_parse_state(&circ_times, global_state, &err) < 0) { + if (circuit_build_times_parse_state(&circ_times, global_state, &err) < 0) { log_warn(LD_GENERAL,"%s",err); tor_free(err); - } - } /** Reload the persistent state from disk, generating a new state as needed. diff --git a/src/or/or.h b/src/or/or.h index 6f4ad4516d..5462784503 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2871,7 +2871,7 @@ void entry_guards_free_all(void); typedef uint32_t build_time_t; #define BUILD_TIME_MAX ((build_time_t)(INT32_MAX)) -/* Have we recieved a cell in the last 90 seconds? */ +/* Have we received a cell in the last 90 seconds? */ #define NETWORK_LIVE_INTERVAL 90 #define BUILD_TIMEOUT_INITIAL_VALUE 60 @@ -2898,8 +2898,8 @@ typedef struct { extern circuit_build_times_t circ_times; void circuit_build_times_update_state(circuit_build_times_t *cbt, or_state_t *state); -int circuit_build_times_parse_state(circuit_build_times_t *cbt, - or_state_t *state, char **msg); +int circuit_build_times_parse_state(circuit_build_times_t *cbt, + or_state_t *state, char **msg); void circuit_build_times_add_timeout(circuit_build_times_t *cbt); void circuit_build_times_set_timeout(circuit_build_times_t *cbt); int circuit_build_times_add_time(circuit_build_times_t *cbt, diff --git a/src/or/test.c b/src/or/test.c index 7228425241..06bfccece6 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -3472,7 +3472,7 @@ test_circuit_timeout(void) log_warn(LD_CIRC, "Timeout is %lf, Xm is %d", timeout2, final.Xm); test_assert(fabs(circuit_build_times_cdf(&initial, timeout0) - - circuit_build_times_cdf(&initial, timeout2)) < 0.05); + circuit_build_times_cdf(&initial, timeout2)) < 0.05); /* Generate MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS timeouts * and 1-that regular values. Then check for timeout error @@ -3491,10 +3491,10 @@ test_circuit_timeout(void) for (i = 0; i < MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS; i++) { circuit_build_times_add_timeout_worker(&estimate, - BUILDTIMEOUT_QUANTILE_CUTOFF); + BUILDTIMEOUT_QUANTILE_CUTOFF); if (i < MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS-1) { circuit_build_times_add_timeout_worker(&final, - BUILDTIMEOUT_QUANTILE_CUTOFF); + BUILDTIMEOUT_QUANTILE_CUTOFF); } } -- cgit v1.2.3-54-g00ecf From 63be2df84f7d23aa92426d6253ff18840e152254 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Sun, 6 Sep 2009 20:43:02 -0700 Subject: Fix issues found by arma in review. --- src/or/circuitbuild.c | 3 +-- src/or/circuituse.c | 4 ---- src/or/config.c | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 50faac1702..a140f0ae67 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -306,8 +306,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, int tot_values = 0, N = 0; config_line_t *line; int i; - msg = NULL; /* XXX is this a bug? should be *msg, or we'll seg fault - * if we try to set it */ + *msg = NULL; circuit_build_times_init(cbt); /* We don't support decreasing the table size yet */ diff --git a/src/or/circuituse.c b/src/or/circuituse.c index f6a41665d1..7ca65bcc53 100644 --- a/src/or/circuituse.c +++ b/src/or/circuituse.c @@ -858,10 +858,8 @@ circuit_build_failed(origin_circuit_t *circ) break; case CIRCUIT_PURPOSE_C_INTRODUCING: /* at Alice, connecting to intro point */ - circuit_increment_failure_count(); /* Don't increment failure count, since Bob may have picked * the introduction point maliciously */ - /* XXX Mike, you didn't read my comment above! :) -RD */ /* Alice will pick a new intro point when this one dies, if * the stream in question still cares. No need to act here. */ break; @@ -873,10 +871,8 @@ circuit_build_failed(origin_circuit_t *circ) break; case CIRCUIT_PURPOSE_S_CONNECT_REND: /* at Bob, connecting to rend point */ - circuit_increment_failure_count(); /* Don't increment failure count, since Alice may have picked * the rendezvous point maliciously */ - /* XXX Mike, you didn't read my comment above! :) -RD */ log_info(LD_REND, "Couldn't connect to Alice's chosen rend point %s " "(%s hop failed).", diff --git a/src/or/config.c b/src/or/config.c index cd222595b5..7c2623eee9 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -409,7 +409,7 @@ static config_var_t _state_vars[] = { V(LastRotatedOnionKey, ISOTIME, NULL), V(LastWritten, ISOTIME, NULL), - V("TotalBuildTimes", UINT, NULL), + V(TotalBuildTimes, UINT, NULL), VAR("CircuitBuildTimeBin", LINELIST_S, BuildtimeHistogram, NULL), VAR("BuildtimeHistogram", LINELIST_V, BuildtimeHistogram, NULL), -- cgit v1.2.3-54-g00ecf From c9363df09ffa25ff01a3546a01d6bf204280913f Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Sun, 6 Sep 2009 21:05:17 -0700 Subject: Remove an assert. It seems to fire because of precision issues. Added more debug info to the warn to try to figure out for sure. --- src/or/circuitbuild.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index a140f0ae67..95ac2e4e64 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -464,8 +464,8 @@ circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, if (gentime < (build_time_t)cbt->timeout*1000) { log_warn(LD_CIRC, "Generated a synthetic timeout LESS than the current timeout: " - "%u vs %d", gentime, cbt->timeout*1000); - tor_assert(gentime >= (build_time_t)cbt->timeout*1000); + "%u vs %d using Xm: %d a: %lf, q: %lf", + gentime, cbt->timeout*1000, cbt->Xm, cbt->alpha, quantile_cutoff); } else if (gentime > BUILD_TIME_MAX) { gentime = BUILD_TIME_MAX; log_info(LD_CIRC, -- cgit v1.2.3-54-g00ecf From 67cee75ca21cbe2c3659a15f50957af8014c2ec0 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 8 Sep 2009 01:31:29 -0700 Subject: Document functions and constants. --- src/or/circuitbuild.c | 120 +++++++++++++++++++++++++++++++++++++++++--------- src/or/or.h | 32 ++++++++++---- 2 files changed, 122 insertions(+), 30 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 95ac2e4e64..b139ee9275 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -112,7 +112,12 @@ circuitbuild_running_unit_tests(void) unit_tests = 1; } -/** DOCDOC */ +/** + * Reset the build time state. + * + * Leave estimated paramters, timeout, and network liveness in tact + * for future use. + */ void circuit_build_times_reset(circuit_build_times_t *cbt) { @@ -120,10 +125,15 @@ circuit_build_times_reset(circuit_build_times_t *cbt) cbt->pre_timeouts = 0; cbt->total_build_times = 0; cbt->build_times_idx = 0; - cbt->computed = 0; + cbt->have_computed_timeout = 0; } -/** DOCDOC */ +/** + * Initialize the buildtimes structure for first use. + * + * Sets the initial timeout value based to either the + * config setting or BUILD_TIMEOUT_INITIAL_VALUE. + */ void circuit_build_times_init(circuit_build_times_t *cbt) { @@ -137,10 +147,11 @@ circuit_build_times_init(circuit_build_times_t *cbt) } /** - * circuit_build_times is a circular array, so loop around when - * array is full + * Add a timeoutout value to the set of build times. Time units + * are milliseconds * - * time units are milliseconds + * circuit_build_times is a circular array, so loop around when + * array is full. */ int circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) @@ -169,7 +180,7 @@ circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) } /** - * Find maximum circuit build time + * Return maximum circuit build time */ static build_time_t circuit_build_times_max(circuit_build_times_t *cbt) @@ -183,7 +194,7 @@ circuit_build_times_max(circuit_build_times_t *cbt) return max_build_time; } -/** DOCDOC */ +/** Return minimum circuit build time */ static build_time_t circuit_build_times_min(circuit_build_times_t *cbt) { @@ -201,7 +212,13 @@ circuit_build_times_min(circuit_build_times_t *cbt) } /** - * Calculate histogram + * Calculate and return a histogram for the set of build times. + * + * Returns an allocated array of histrogram bins representing + * the frequency of index*BUILDTIME_BIN_WIDTH millisecond + * build times. Also outputs the number of bins in nbins. + * + * The return value must be freed by the caller. */ static uint32_t * circuit_build_times_create_histogram(circuit_build_times_t *cbt, @@ -225,7 +242,11 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt, return histogram; } -/** DOCDOC */ +/** + * Return the most frequent build time (rounded to BUILDTIME_BIN_WIDTH ms). + * + * Ties go in favor of the slower time. + */ static build_time_t circuit_build_times_mode(circuit_build_times_t *cbt) { @@ -244,7 +265,8 @@ circuit_build_times_mode(circuit_build_times_t *cbt) } /** - * output a histogram of current circuit build times. + * Output a histogram of current circuit build times to + * the or_state_t state structure. */ void circuit_build_times_update_state(circuit_build_times_t *cbt, @@ -282,7 +304,11 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, if (histogram) tor_free(histogram); } -/* Stolen from http://en.wikipedia.org/wiki/Fisher\u2013Yates_shuffle */ +/** + * Shuffle the build times array. + * + * Stolen from http://en.wikipedia.org/wiki/Fisher\u2013Yates_shuffle + */ static void circuit_build_times_shuffle_array(circuit_build_times_t *cbt) { @@ -298,7 +324,13 @@ circuit_build_times_shuffle_array(circuit_build_times_t *cbt) } } -/** Load histogram from state. DOCDOC what else? */ +/** + * Load histogram from state, shuffling the resulting array + * after we do so. Use this result to estimate parameters and + * calculate the timeout. + * + * Returns -1 and sets msg on error. Msg must be freed by the caller. + */ int circuit_build_times_parse_state(circuit_build_times_t *cbt, or_state_t *state, char **msg) @@ -368,7 +400,15 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, return msg ? -1 : 0; } -/** DOCDOC */ +/** + * Estimates the Xm and Alpha parameters using + * http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation + * + * The notable difference is that we use mode instead of min to estimate Xm. + * This is because our distribution is frechet-like. We claim this is + * an acceptable approximation because we are only concerned with the + * accuracy of the CDF of the tail. + */ void circuit_build_times_update_alpha(circuit_build_times_t *cbt) { @@ -407,8 +447,8 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) * in the distribution such that F(x) = quantile (ie quantile*100% * of the mass of the density function is below x on the curve). * - * We use it to calculate the timeout and also synthetic values of - * time for circuits that timeout before completion. + * We use it to calculate the timeout and also to generate synthetic + * values of time for circuits that timeout before completion. * * See http://en.wikipedia.org/wiki/Quantile_function, * http://en.wikipedia.org/wiki/Inverse_transform_sampling and @@ -428,7 +468,7 @@ circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, return ret; } -/* Pareto CDF */ +/** Pareto CDF */ double circuit_build_times_cdf(circuit_build_times_t *cbt, double x) { @@ -437,6 +477,12 @@ circuit_build_times_cdf(circuit_build_times_t *cbt, double x) return ret; } +/** + * Generate a synthetic time using our distribution parameters. + * + * The return value will be between q_lo and q_hi quantile points + * on the CDF. + */ build_time_t circuit_build_times_generate_sample(circuit_build_times_t *cbt, double q_lo, double q_hi) @@ -451,12 +497,12 @@ circuit_build_times_generate_sample(circuit_build_times_t *cbt, return ret; } +/** Generate points in [cutoff, 1.0) on the CDF. */ void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, double quantile_cutoff) { - /* Generate points in [cutoff, 1.0) on the CDF... We want to - * stay a bit short of 1.0 though, because longtail is + /* We want to stay a bit short of 1.0, because longtail is * loooooooooooooooooooooooooooooooooooooooooooooooooooong */ build_time_t gentime = circuit_build_times_generate_sample(cbt, quantile_cutoff, 0.98); @@ -478,6 +524,10 @@ circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, circuit_build_times_add_time(cbt, gentime); } +/** + * Estimate an initial alpha parameter by solving the quantile + * function with a quantile point and a specific timeout value. + */ void circuit_build_times_initial_alpha(circuit_build_times_t *cbt, double quantile, build_time_t timeout) @@ -491,6 +541,10 @@ circuit_build_times_initial_alpha(circuit_build_times_t *cbt, cbt->alpha = ln(1.0-quantile)/(ln(cbt->Xm)-ln(timeout)); } +/** + * Generate synthetic timeout values for the timeouts + * that have happened before we estimated our parameters. + */ static void circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) { @@ -521,6 +575,10 @@ circuit_build_times_needs_circuits(circuit_build_times_t *cbt) return 0; } +/** + * Returns true if we should build a timeout test circuit + * right now. + */ int circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt) { @@ -528,12 +586,19 @@ circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt) approx_time()-cbt->last_circ_at > BUILD_TIMES_TEST_FREQUENCY; } +/** + * Called to indicate that the network showed some signs of liveness. + */ void circuit_build_times_network_is_live(circuit_build_times_t *cbt) { cbt->network_last_live = approx_time(); } +/** + * Returns true if the network showed some sign of liveness + * in the past NETWORK_LIVE_INTERVAL. + */ int circuit_build_times_is_network_live(circuit_build_times_t *cbt) { @@ -546,6 +611,15 @@ circuit_build_times_is_network_live(circuit_build_times_t *cbt) return 1; } +/** + * Returns true if we have seen more than MAX_RECENT_TIMEOUT_RATE of + * the past RECENT_CIRCUITS time out. Used to detect if the network + * connection has changed significantly. + * + * Also resets the entire timeout history in this case and causes us + * to restart the process of building test circuits and estimating a + * new timeout. + */ int circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) { @@ -631,7 +705,7 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt) return; } - if (!cbt->computed) { + if (!cbt->have_computed_timeout) { /* Store a timeout before we have enough data as special */ cbt->pre_timeouts++; return; @@ -641,6 +715,10 @@ circuit_build_times_add_timeout(circuit_build_times_t *cbt) circuit_build_times_add_timeout_worker(cbt, BUILDTIMEOUT_QUANTILE_CUTOFF); } +/** + * Estimate a new timeout based on history and set our timeout + * variable accordingly. + */ void circuit_build_times_set_timeout(circuit_build_times_t *cbt) { @@ -659,7 +737,7 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) timeout = circuit_build_times_calculate_timeout(cbt, BUILDTIMEOUT_QUANTILE_CUTOFF); - cbt->computed = 1; + cbt->have_computed_timeout = 1; cbt->timeout = lround(timeout/1000.0); log_info(LD_CIRC, diff --git a/src/or/or.h b/src/or/or.h index 5462784503..b27526fb84 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2858,29 +2858,43 @@ void bridges_retry_all(void); void entry_guards_free_all(void); /* Circuit Build Timeout "public" functions and structures. */ + +/** How many circuits count as recent when deciding if the + * connection has changed. */ #define RECENT_CIRCUITS 20 -#define MIN_CIRCUITS_TO_OBSERVE 500 -#define NCIRCUITS_TO_OBSERVE 5000 /* approx 1.5 weeks worth of circuits */ -#define BUILDTIME_BIN_WIDTH 50 +/** Maximum fraction of timeouts to tolerate in the past + * RECENT_CIRCUITS before calculating a new timeout */ #define MAX_RECENT_TIMEOUT_RATE 0.7999999 -/* TODO: This should be moved to the consensus */ +/** Minimum circuits before estimating a timeout */ +#define MIN_CIRCUITS_TO_OBSERVE 500 + +/** Total size of the circuit timeout history to accumulate. + * 5000 is approx 1.5 weeks worth of continual-use circuits. */ +#define NCIRCUITS_TO_OBSERVE 5000 + +/** Width of the histogram bins in milliseconds */ +#define BUILDTIME_BIN_WIDTH 50 + +/** Cuttof point on the CDF for our timeout estimation. + * TODO: This should be moved to the consensus */ #define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8 typedef uint32_t build_time_t; #define BUILD_TIME_MAX ((build_time_t)(INT32_MAX)) -/* Have we received a cell in the last 90 seconds? */ +/** Have we received a cell in the last 90 seconds? */ #define NETWORK_LIVE_INTERVAL 90 +/** Initial circuit build timeout */ #define BUILD_TIMEOUT_INITIAL_VALUE 60 -/* How often in seconds should we build a test circuit */ +/** How often in seconds should we build a test circuit */ #define BUILD_TIMES_TEST_FREQUENCY 60 -/* Save state every 5 circuits */ -#define BUILD_TIMES_SAVE_STATE_EVERY 5 +/** Save state every 10 circuits */ +#define BUILD_TIMES_SAVE_STATE_EVERY 10 typedef struct { build_time_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; @@ -2891,7 +2905,7 @@ typedef struct { int pre_timeouts; build_time_t Xm; double alpha; - int computed; + int have_computed_timeout; int timeout; } circuit_build_times_t; -- cgit v1.2.3-54-g00ecf From 742e08046ff52fe4c6b0c33ac759eb218f0228f4 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Wed, 9 Sep 2009 00:01:57 -0700 Subject: Fix bugs relating to not counting timeouts as circuit builds. Also use bin midpoints for time values. --- src/or/circuitbuild.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index b139ee9275..05aca5fd58 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -261,7 +261,7 @@ circuit_build_times_mode(circuit_build_times_t *cbt) tor_free(histogram); - return max_bin*BUILDTIME_BIN_WIDTH; + return max_bin*BUILDTIME_BIN_WIDTH+BUILDTIME_BIN_WIDTH/2; } /** @@ -291,8 +291,8 @@ circuit_build_times_update_state(circuit_build_times_t *cbt, *next = line = tor_malloc_zero(sizeof(config_line_t)); line->key = tor_strdup("CircuitBuildTimeBin"); line->value = tor_malloc(25); - tor_snprintf(line->value, 25, "%d %d", i*BUILDTIME_BIN_WIDTH, - histogram[i]); + tor_snprintf(line->value, 25, "%d %d", + i*BUILDTIME_BIN_WIDTH+BUILDTIME_BIN_WIDTH/2, histogram[i]); next = &(line->next); } @@ -548,10 +548,12 @@ circuit_build_times_initial_alpha(circuit_build_times_t *cbt, static void circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) { - /* Store a timeout as a random position on this curve. */ + /* Store a timeout as a random position past the current + * cutoff on the pareto curve */ if (cbt->pre_timeouts) { double timeout_quantile = 1.0- - ((double)cbt->pre_timeouts)/cbt->total_build_times; + ((double)cbt->pre_timeouts)/ + (cbt->pre_timeouts+cbt->total_build_times); cbt->Xm = circuit_build_times_min(cbt); // Use current timeout to get an estimate on alpha circuit_build_times_initial_alpha(cbt, timeout_quantile, @@ -628,7 +630,7 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) double timeout; int i; - if (cbt->total_build_times < RECENT_CIRCUITS) { + if ((cbt->pre_timeouts+cbt->total_build_times) < RECENT_CIRCUITS) { return 0; } @@ -643,6 +645,7 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) timeout_rate++; } } + timeout_rate += cbt->pre_timeouts; timeout_rate /= RECENT_CIRCUITS; /* If more than 80% of our recent circuits are timing out, -- cgit v1.2.3-54-g00ecf From 09a75ad31609b30f9e4a057373f2c4f17427eaf9 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Thu, 10 Sep 2009 22:12:46 -0700 Subject: Time for some debugging by asserts. Got a negative timeout value on startup. Need to narrow it down. --- src/or/circuitbuild.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 05aca5fd58..4999078c90 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -460,7 +460,12 @@ double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, double quantile) { - double ret = cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha); + double ret; + tor_assert(quantile >= 0); + tor_assert(1.0-quantile > 0); + tor_assert(cbt->Xm > 0); + + ret = cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha); if (ret > INT32_MAX) { ret = INT32_MAX; } @@ -472,7 +477,9 @@ circuit_build_times_calculate_timeout(circuit_build_times_t *cbt, double circuit_build_times_cdf(circuit_build_times_t *cbt, double x) { - double ret = 1.0-pow(cbt->Xm/x,cbt->alpha); + double ret; + tor_assert(cbt->Xm > 0); + ret = 1.0-pow(cbt->Xm/x,cbt->alpha); tor_assert(0 <= ret && ret <= 1.0); return ret; } @@ -488,8 +495,13 @@ circuit_build_times_generate_sample(circuit_build_times_t *cbt, double q_lo, double q_hi) { uint64_t r = crypto_rand_uint64(UINT64_MAX-1); - double u = q_lo + ((q_hi-q_lo)*r)/(1.0*UINT64_MAX); build_time_t ret; + double u; + + tor_assert(q_lo >= 0); + tor_assert(q_hi < 1); + + u = q_lo + ((q_hi-q_lo)*r)/(1.0*UINT64_MAX); tor_assert(0 <= u && u < 1.0); ret = lround(circuit_build_times_calculate_timeout(cbt, u)); @@ -538,7 +550,10 @@ circuit_build_times_initial_alpha(circuit_build_times_t *cbt, // CircBuildTimeout = Xm*((1-0.8))^(-1/a)) // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a) // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a + tor_assert(quantile > 0); + tor_assert(cbt->Xm > 0); cbt->alpha = ln(1.0-quantile)/(ln(cbt->Xm)-ln(timeout)); + tor_assert(cbt->alpha > 0); } /** @@ -555,6 +570,7 @@ circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt) ((double)cbt->pre_timeouts)/ (cbt->pre_timeouts+cbt->total_build_times); cbt->Xm = circuit_build_times_min(cbt); + tor_assert(cbt->Xm > 0); // Use current timeout to get an estimate on alpha circuit_build_times_initial_alpha(cbt, timeout_quantile, cbt->timeout*1000); @@ -630,7 +646,7 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) double timeout; int i; - if ((cbt->pre_timeouts+cbt->total_build_times) < RECENT_CIRCUITS) { + if ((cbt->pre_timeouts + cbt->total_build_times) < RECENT_CIRCUITS) { return 0; } @@ -656,7 +672,8 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) log_notice(LD_CIRC, "Network connection speed appears to have changed. " - "Resetting timeouts."); + "Resetting timeouts after %d pretimouts and %d buildtimes", + cbt->pre_timeouts, cbt->total_build_times); if (Xm >= (build_time_t)cbt->timeout*1000) { Xm = circuit_build_times_min(cbt); @@ -669,6 +686,7 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) goto reset; } } + tor_assert(Xm > 0); cbt->Xm = Xm; circuit_build_times_initial_alpha(cbt, 1.0-timeout_rate, -- cgit v1.2.3-54-g00ecf From 0352d4391756cf80c220d9daf49f2cbc55856e89 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Mon, 14 Sep 2009 04:03:57 -0700 Subject: Move circuitbuildtimeout config check. We want it to be under our control so it doesn't mess up initialization. This is likely the cause for the bug the previous assert-adding commit (09a75ad) was trying to address. --- src/or/circuitbuild.c | 17 +++++++++++++++++ src/or/config.c | 11 ----------- src/or/or.h | 3 +++ 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 4999078c90..a141795a1e 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -141,6 +141,11 @@ circuit_build_times_init(circuit_build_times_t *cbt) if (!unit_tests && get_options()->CircuitBuildTimeout) { cbt->timeout = get_options()->CircuitBuildTimeout; + if (cbt->timeout < BUILD_TIMEOUT_MIN_VALUE) { + log_warn(LD_CIRC, "Config CircuitBuildTimeout too low. Setting to %d", + BUILD_TIMEOUT_MIN_VALUE); + cbt->timeout = BUILD_TIMEOUT_MIN_VALUE; + } } else { cbt->timeout = BUILD_TIMEOUT_INITIAL_VALUE; } @@ -697,6 +702,12 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) cbt->timeout = lround(timeout/1000.0); + if (cbt->timeout < BUILD_TIMEOUT_MIN_VALUE) { + log_warn(LD_CIRC, "Reset buildtimeout to low value %lf. Setting to %d", + timeout, BUILD_TIMEOUT_MIN_VALUE); + cbt->timeout = BUILD_TIMEOUT_MIN_VALUE; + } + log_notice(LD_CIRC, "Reset circuit build timeout to %d (%lf, Xm: %d, a: %lf) based " "on %d recent circuit times", cbt->timeout, timeout, cbt->Xm, @@ -761,6 +772,12 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) cbt->have_computed_timeout = 1; cbt->timeout = lround(timeout/1000.0); + if (cbt->timeout < BUILD_TIMEOUT_MIN_VALUE) { + log_warn(LD_CIRC, "Set buildtimeout to low value %lf. Setting to %d", + timeout, BUILD_TIMEOUT_MIN_VALUE); + cbt->timeout = BUILD_TIMEOUT_MIN_VALUE; + } + log_info(LD_CIRC, "Set circuit build timeout to %d (%lf, Xm: %d, a: %lf) based on " "%d circuit times", cbt->timeout, timeout, cbt->Xm, cbt->alpha, diff --git a/src/or/config.c b/src/or/config.c index 7c2623eee9..0712fbee7d 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -2919,11 +2919,6 @@ compute_publishserverdescriptor(or_options_t *options) /** Highest allowable value for RendPostPeriod. */ #define MAX_DIR_PERIOD (MIN_ONION_KEY_LIFETIME/2) -/** Lowest allowable value for CircuitBuildTimeout; values too low will - * increase network load because of failing connections being retried, and - * might prevent users from connecting to the network at all. */ -#define MIN_CIRCUIT_BUILD_TIMEOUT 3 - /** Lowest allowable value for MaxCircuitDirtiness; if this is too low, Tor * will generate too many circuits and potentially overload the network. */ #define MIN_MAX_CIRCUIT_DIRTINESS 10 @@ -3370,12 +3365,6 @@ options_validate(or_options_t *old_options, or_options_t *options, options->RendPostPeriod = MAX_DIR_PERIOD; } - if (options->CircuitBuildTimeout < MIN_CIRCUIT_BUILD_TIMEOUT) { - log(LOG_WARN, LD_CONFIG, "CircuitBuildTimeout option is too short; " - "raising to %d seconds.", MIN_CIRCUIT_BUILD_TIMEOUT); - options->CircuitBuildTimeout = MIN_CIRCUIT_BUILD_TIMEOUT; - } - if (options->MaxCircuitDirtiness < MIN_MAX_CIRCUIT_DIRTINESS) { log(LOG_WARN, LD_CONFIG, "MaxCircuitDirtiness option is too short; " "raising to %d seconds.", MIN_MAX_CIRCUIT_DIRTINESS); diff --git a/src/or/or.h b/src/or/or.h index b27526fb84..4ee5abf21d 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2887,6 +2887,9 @@ typedef uint32_t build_time_t; /** Have we received a cell in the last 90 seconds? */ #define NETWORK_LIVE_INTERVAL 90 +/** Lowest allowable value for CircuitBuildTimeout */ +#define BUILD_TIMEOUT_MIN_VALUE 3 + /** Initial circuit build timeout */ #define BUILD_TIMEOUT_INITIAL_VALUE 60 -- cgit v1.2.3-54-g00ecf From 81dc435ffaa86382c7702a5d7c7460635225dcb7 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Wed, 16 Sep 2009 17:03:54 -0700 Subject: Update proposal to match implementation. --- .../proposals/151-path-selection-improvements.txt | 85 +++++++++++----------- 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/doc/spec/proposals/151-path-selection-improvements.txt b/doc/spec/proposals/151-path-selection-improvements.txt index 7821a5dddb..94e964b017 100644 --- a/doc/spec/proposals/151-path-selection-improvements.txt +++ b/doc/spec/proposals/151-path-selection-improvements.txt @@ -20,7 +20,7 @@ Motivation Implementation - Storing Build Times + Gathering Build Times Circuit build times are stored in the circular array 'circuit_build_times' consisting of uint32_t elements as milliseconds. @@ -30,8 +30,16 @@ Implementation too large, because it will make it difficult for clients to adapt to moving between different links. - From our observations, this value appears to be on the order of 1000, - but is configurable in a #define NCIRCUITS_TO_OBSERVE. + From our observations, the minimum value for a reasonable fit appears + to be on the order of 500 (MIN_CIRCUITS_TO_OBSERVE). However, to keep + a good fit over the long term, we store 5000 most recent circuits in + the array (NCIRCUITS_TO_OBSERVE). + + The Tor client will build test circuits at a rate of one per + minute (BUILD_TIMES_TEST_FREQUENCY) up to the point of + MIN_CIRCUITS_TO_OBSERVE. This allows a fresh Tor to have + a CircuitBuildTimeout estimated within 8 hours after install, + upgrade, or network change (see below). Long Term Storage @@ -43,9 +51,9 @@ Implementation Example: TotalBuildTimes 100 - CircuitBuildTimeBin 0 50 - CircuitBuildTimeBin 50 25 - CircuitBuildTimeBin 100 13 + CircuitBuildTimeBin 25 50 + CircuitBuildTimeBin 75 25 + CircuitBuildTimeBin 125 13 ... Reading the histogram in will entail inserting values @@ -57,7 +65,12 @@ Implementation Learning the CircuitBuildTimeout Based on studies of build times, we found that the distribution of - circuit buildtimes appears to be a Pareto distribution. + circuit buildtimes appears to be a Frechet distribution. However, + estimators and quantile functions of the Frechet distribution are + difficult to work with and slow to converge. So instead, since we + are only interested in the accuracy of the tail, we approximate + the tail of the distribution with a Pareto curve starting at + the mode of the circuit build time sample set. We will calculate the parameters for a Pareto distribution fitting the data using the estimators at @@ -73,11 +86,8 @@ Implementation Detecting Changing Network Conditions - We attempt to detect both network connectivty loss and drastic - changes in the timeout characteristics. Network connectivity loss - is detected by recording a timestamp every time Tor either completes - a TLS connection or receives a cell. If this timestamp is more than - 90 seconds in the past, circuit timeouts are no longer counted. + We attempt to detect both network connectivity loss and drastic + changes in the timeout characteristics. If more than MAX_RECENT_TIMEOUT_RATE (80%) of the past RECENT_CIRCUITS (20) time out, we assume the network connection @@ -86,6 +96,11 @@ Implementation position on the Pareto Quartile function for the ratio of timeouts. + Network connectivity loss is detected by recording a timestamp every + time Tor either completes a TLS connection or receives a cell. If + this timestamp is more than CircuitBuildTimeout*RECENT_CIRCUITS/3 + seconds in the past, circuit timeouts are no longer counted. + Testing After circuit build times, storage, and learning are implemented, @@ -96,44 +111,28 @@ Implementation the python produces matches that which is output to the state file in Tor, and verify that the Pareto parameters and cutoff points also match. - Soft timeout vs Hard Timeout - - At some point, it may be desirable to change the cutoff from a - single hard cutoff that destroys the circuit to a soft cutoff and - a hard cutoff, where the soft cutoff merely triggers the building - of a new circuit, and the hard cutoff triggers destruction of the - circuit. - - Good values for hard and soft cutoffs seem to be 80% and 60% - respectively, but we should eventually justify this with observation. - - When to Begin Calculation - - The number of circuits to observe (NCIRCUITS_TO_CUTOFF) before - changing the CircuitBuildTimeout will be tunable via a #define. From - our measurements, a good value for NCIRCUITS_TO_CUTOFF appears to be - on the order of 100. + We will also verify that there are no unexpected large deviations from + node selection, such as nodes from distant geographical locations being + completely excluded. Dealing with Timeouts Timeouts should be counted as the expectation of the region of - of the Pareto distribution beyond the cutoff. The proposal will - be updated with this value soon. + of the Pareto distribution beyond the cutoff. This is done by + generating a random sample for each timeout at points on the + curve beyond the current timeout cutoff. - Also, in the event of network failure, the observation mechanism - should stop collecting timeout data. + Future Work - Client Hints - - Some research still needs to be done to provide initial values - for CircuitBuildTimeout based on values learned from modem - users, DSL users, Cable Modem users, and dedicated links. A - radiobutton in Vidalia should eventually be provided that - sets CircuitBuildTimeout to one of these values and also - provide the option of purging all learned data, should any exist. + At some point, it may be desirable to change the cutoff from a + single hard cutoff that destroys the circuit to a soft cutoff and + a hard cutoff, where the soft cutoff merely triggers the building + of a new circuit, and the hard cutoff triggers destruction of the + circuit. - These values can either be published in the directory, or - shipped hardcoded for a particular Tor version. + It may also be beneficial to learn separate timeouts for each + guard node, as they will have slightly different distributions. + This will take longer to generate initial values though. Issues -- cgit v1.2.3-54-g00ecf From 5bd60d8a411c90000e6a55e70eb814ea6c69e011 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 15 Sep 2009 18:00:48 -0700 Subject: Address nickm's issues from his review #1. --- src/or/circuitbuild.c | 14 +++++++++----- src/or/or.h | 14 +++++++++++++- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index a141795a1e..25b81992b9 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -88,7 +88,7 @@ static smartlist_t *entry_guards = NULL; static int entry_guards_dirty = 0; /** If set, we're running the unit tests: we should avoid clobbering - * our state file. */ + * our state file or accessing get_options() or get_or_state() */ static int unit_tests = 0; /********* END VARIABLES ************/ @@ -427,11 +427,15 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) cbt->Xm = circuit_build_times_mode(cbt); for (i=0; i< NCIRCUITS_TO_OBSERVE; i++) { - if (!x[i]) continue; + if (!x[i]) { + continue; + } - // Hrmm, should we count < Xm as Xm or just drop - if (x[i] < cbt->Xm) a += ln(cbt->Xm); - else a += ln(x[i]); + if (x[i] < cbt->Xm) { + a += ln(cbt->Xm); + } else { + a += ln(x[i]); + } n++; } diff --git a/src/or/or.h b/src/or/or.h index 4ee5abf21d..a343b1f4b1 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2875,12 +2875,13 @@ void entry_guards_free_all(void); #define NCIRCUITS_TO_OBSERVE 5000 /** Width of the histogram bins in milliseconds */ -#define BUILDTIME_BIN_WIDTH 50 +#define BUILDTIME_BIN_WIDTH ((build_time_t)50) /** Cuttof point on the CDF for our timeout estimation. * TODO: This should be moved to the consensus */ #define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8 +/** A build_time_t is milliseconds */ typedef uint32_t build_time_t; #define BUILD_TIME_MAX ((build_time_t)(INT32_MAX)) @@ -2900,15 +2901,26 @@ typedef uint32_t build_time_t; #define BUILD_TIMES_SAVE_STATE_EVERY 10 typedef struct { + /** The circular array of recorded build times in milliseconds */ build_time_t circuit_build_times[NCIRCUITS_TO_OBSERVE]; + /** The timestamp we last completed a TLS handshake or received a cell */ time_t network_last_live; + /** Last time we built a circuit. Used to decide to build new test circs */ time_t last_circ_at; + /** Current index in the circuit_build_times circular array */ int build_times_idx; + /** Total number of build times accumulated. Maxes at NCIRCUITS_TO_OBSERVE */ int total_build_times; + /** Number of timeouts that have happened before estimating pareto + * parameters */ int pre_timeouts; + /** "Minimum" value of our pareto distribution (actually mode) */ build_time_t Xm; + /** alpha exponent for pareto dis */ double alpha; + /** Have we computed a timeout? */ int have_computed_timeout; + /** The value for that timeout in seconds, not milliseconds */ int timeout; } circuit_build_times_t; -- cgit v1.2.3-54-g00ecf From e4e0ce94f0cd6ad18b9158e99936faeee5b6e092 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Wed, 16 Sep 2009 04:55:43 -0700 Subject: Add log message so we have accurate build time values. --- src/or/circuitbuild.c | 6 +++++- src/or/or.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 25b81992b9..a87ad2e290 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -170,6 +170,9 @@ circuit_build_times_add_time(circuit_build_times_t *cbt, build_time_t time) return -1; } + // XXX: Probably want to demote this to debug for the release. + log_info(LD_CIRC, "Adding circuit build time %u", time); + cbt->circuit_build_times[cbt->build_times_idx] = time; cbt->build_times_idx = (cbt->build_times_idx + 1) % NCIRCUITS_TO_OBSERVE; if (cbt->total_build_times < NCIRCUITS_TO_OBSERVE) @@ -539,7 +542,8 @@ circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, "Generated a synthetic timeout larger than the max: %u", gentime); } else { - log_info(LD_CIRC, "Generated synthetic time %u for timeout", gentime); + log_info(LD_CIRC, "Generated synthetic circuit build time %u for timeout", + gentime); } circuit_build_times_add_time(cbt, gentime); diff --git a/src/or/or.h b/src/or/or.h index a343b1f4b1..fb4ab8452f 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2916,7 +2916,7 @@ typedef struct { int pre_timeouts; /** "Minimum" value of our pareto distribution (actually mode) */ build_time_t Xm; - /** alpha exponent for pareto dis */ + /** alpha exponent for pareto dist. */ double alpha; /** Have we computed a timeout? */ int have_computed_timeout; -- cgit v1.2.3-54-g00ecf From e2c2fa7a1f763b1815adc13bdea46d4dbdba78c1 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Wed, 16 Sep 2009 17:14:01 -0700 Subject: Change liveness value to be a function of the timeout. And also the number of recent circuits used to decide when the network changes. --- src/or/circuitbuild.c | 5 +++-- src/or/or.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index a87ad2e290..1cb1fd2144 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -628,13 +628,14 @@ circuit_build_times_network_is_live(circuit_build_times_t *cbt) /** * Returns true if the network showed some sign of liveness - * in the past NETWORK_LIVE_INTERVAL. + * in the past NETWORK_LIVE_MULTIPLIER*cbt->timeout seconds. */ int circuit_build_times_is_network_live(circuit_build_times_t *cbt) { time_t now = approx_time(); - if (now - cbt->network_last_live > NETWORK_LIVE_INTERVAL) { + if (now - cbt->network_last_live > + (cbt->timeout*NETWORK_LIVE_MULTIPLIER)) { log_info(LD_CIRC, "Network is no longer live. Dead for %ld seconds.", now - cbt->network_last_live); return 0; diff --git a/src/or/or.h b/src/or/or.h index fb4ab8452f..98fa841065 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2885,8 +2885,8 @@ void entry_guards_free_all(void); typedef uint32_t build_time_t; #define BUILD_TIME_MAX ((build_time_t)(INT32_MAX)) -/** Have we received a cell in the last 90 seconds? */ -#define NETWORK_LIVE_INTERVAL 90 +/** Have we received a cell in the last N seconds? */ +#define NETWORK_LIVE_MULTIPLIER (RECENT_CIRCUITS/2.5) /** Lowest allowable value for CircuitBuildTimeout */ #define BUILD_TIMEOUT_MIN_VALUE 3 -- cgit v1.2.3-54-g00ecf From 1aac7de1ea8cacf2d93f25dc7a4366350035c91c Mon Sep 17 00:00:00 2001 From: Sebastian Hahn Date: Thu, 17 Sep 2009 00:20:25 +0200 Subject: Fix unit tests and compile issues on Snow Leopard --- src/or/circuitbuild.c | 27 +++++++++++++++++---------- src/or/test.c | 4 ++-- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 1cb1fd2144..c20db4b205 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -368,13 +368,15 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, uint32_t count, k; build_time_t ms; int ok; - ms = tor_parse_ulong(ms_str, 0, 0, BUILD_TIME_MAX, &ok, NULL); + ms = (build_time_t)tor_parse_ulong(ms_str, 0, 0, + BUILD_TIME_MAX, &ok, NULL); if (!ok) { *msg = tor_strdup("Unable to parse circuit build times: " "Unparsable bin number"); break; } - count = tor_parse_ulong(count_str, 0, 0, UINT32_MAX, &ok, NULL); + count = (uint32_t)tor_parse_ulong(count_str, 0, 0, + UINT32_MAX, &ok, NULL); if (!ok) { *msg = tor_strdup("Unable to parse circuit build times: " "Unparsable bin count"); @@ -405,7 +407,7 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, tor_assert(cbt->total_build_times == state->TotalBuildTimes); tor_assert(tot_values == cbt->total_build_times); circuit_build_times_set_timeout(cbt); - return msg ? -1 : 0; + return *msg ? -1 : 0; } /** @@ -516,7 +518,8 @@ circuit_build_times_generate_sample(circuit_build_times_t *cbt, u = q_lo + ((q_hi-q_lo)*r)/(1.0*UINT64_MAX); tor_assert(0 <= u && u < 1.0); - ret = lround(circuit_build_times_calculate_timeout(cbt, u)); + /* circuit_build_times_calculate_timeout returns <= INT32_MAX */ + ret = (uint32_t)lround(circuit_build_times_calculate_timeout(cbt, u)); tor_assert(ret > 0); return ret; } @@ -704,12 +707,12 @@ circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt) cbt->Xm = Xm; circuit_build_times_initial_alpha(cbt, 1.0-timeout_rate, - cbt->timeout*1000.0); + cbt->timeout*1000); timeout = circuit_build_times_calculate_timeout(cbt, BUILDTIMEOUT_QUANTILE_CUTOFF); - - cbt->timeout = lround(timeout/1000.0); + /* timeout is INT32_MAX at most */ + cbt->timeout = (uint32_t)lround(timeout/1000.0); if (cbt->timeout < BUILD_TIMEOUT_MIN_VALUE) { log_warn(LD_CIRC, "Reset buildtimeout to low value %lf. Setting to %d", @@ -779,7 +782,8 @@ circuit_build_times_set_timeout(circuit_build_times_t *cbt) BUILDTIMEOUT_QUANTILE_CUTOFF); cbt->have_computed_timeout = 1; - cbt->timeout = lround(timeout/1000.0); + /* timeout is INT32_MAX at most */ + cbt->timeout = (uint32_t)lround(timeout/1000.0); if (cbt->timeout < BUILD_TIMEOUT_MIN_VALUE) { log_warn(LD_CIRC, "Set buildtimeout to low value %lf. Setting to %d", @@ -1376,11 +1380,14 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) hop = onion_next_hop_in_cpath(circ->cpath); if (!hop) { struct timeval end; + long timediff; tor_gettimeofday(&end); + timediff = tv_mdiff(&circ->_base.highres_created, &end); + if (timediff > INT32_MAX) + timediff = INT32_MAX; /* done building the circuit. whew. */ circuit_set_state(TO_CIRCUIT(circ), CIRCUIT_STATE_OPEN); - circuit_build_times_add_time(&circ_times, - tv_mdiff(&circ->_base.highres_created, &end)); + circuit_build_times_add_time(&circ_times, (uint32_t)timediff); circuit_build_times_set_timeout(&circ_times); log_info(LD_CIRC,"circuit built!"); circuit_reset_failure_count(0); diff --git a/src/or/test.c b/src/or/test.c index 06bfccece6..df7bead4b3 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -3436,7 +3436,7 @@ test_circuit_timeout(void) memset(&state, 0, sizeof(or_state_t)); circuitbuild_running_unit_tests(); -#define timeout0 (30*1000.0) +#define timeout0 (build_time_t)(30*1000.0) initial.Xm = 750; circuit_build_times_initial_alpha(&initial, BUILDTIMEOUT_QUANTILE_CUTOFF, timeout0); @@ -3444,7 +3444,7 @@ test_circuit_timeout(void) int n = 0; for (i=0; i < MIN_CIRCUITS_TO_OBSERVE; i++) { if (circuit_build_times_add_time(&estimate, - circuit_build_times_generate_sample(&initial, 0, 1)) == 0) { + circuit_build_times_generate_sample(&initial, 0, .98)) == 0) { n++; } } -- cgit v1.2.3-54-g00ecf From 43c18746bd4be9db4ad68312e9e62031f056bc10 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Wed, 16 Sep 2009 18:41:22 -0700 Subject: Clarify use of magic number 0.98 with #define. --- src/or/circuitbuild.c | 4 +--- src/or/or.h | 5 +++++ src/or/test.c | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index c20db4b205..177852f91a 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -529,10 +529,8 @@ void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt, double quantile_cutoff) { - /* We want to stay a bit short of 1.0, because longtail is - * loooooooooooooooooooooooooooooooooooooooooooooooooooong */ build_time_t gentime = circuit_build_times_generate_sample(cbt, - quantile_cutoff, 0.98); + quantile_cutoff, MAX_SYNTHETIC_QUANTILE); if (gentime < (build_time_t)cbt->timeout*1000) { log_warn(LD_CIRC, diff --git a/src/or/or.h b/src/or/or.h index 98fa841065..bdb4d97924 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2867,6 +2867,11 @@ void entry_guards_free_all(void); * RECENT_CIRCUITS before calculating a new timeout */ #define MAX_RECENT_TIMEOUT_RATE 0.7999999 +/** Maximum quantile to use to generate synthetic timeouts. + * We want to stay a bit short of 1.0, because longtail is + * loooooooooooooooooooooooooooooooooooooooooooooooooooong. */ +#define MAX_SYNTHETIC_QUANTILE 0.98 + /** Minimum circuits before estimating a timeout */ #define MIN_CIRCUITS_TO_OBSERVE 500 diff --git a/src/or/test.c b/src/or/test.c index df7bead4b3..cf00c080d4 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -3444,7 +3444,8 @@ test_circuit_timeout(void) int n = 0; for (i=0; i < MIN_CIRCUITS_TO_OBSERVE; i++) { if (circuit_build_times_add_time(&estimate, - circuit_build_times_generate_sample(&initial, 0, .98)) == 0) { + circuit_build_times_generate_sample(&initial, 0, + MAX_SYNTHETIC_QUANTILE)) == 0) { n++; } } -- cgit v1.2.3-54-g00ecf