summaryrefslogtreecommitdiff
path: root/src/or
diff options
context:
space:
mode:
authorMike Perry <mikeperry-git@fscked.org>2009-08-27 01:46:06 -0700
committerMike Perry <mikeperry-git@fscked.org>2009-09-16 15:48:52 -0700
commit04414830fe199d80bddf67c64e17d32d54a385e4 (patch)
treee62cd769904af16fd8e6268f431fac67e62ffb37 /src/or
parent7750bee21dda817611afd936558834bb21411301 (diff)
downloadtor-04414830fe199d80bddf67c64e17d32d54a385e4.tar.gz
tor-04414830fe199d80bddf67c64e17d32d54a385e4.zip
Implement the pareto fitting and timeout calculating bits.
Diffstat (limited to 'src/or')
-rw-r--r--src/or/Makefile.am4
-rw-r--r--src/or/circuitbuild.c285
-rw-r--r--src/or/circuituse.c2
-rw-r--r--src/or/config.c7
-rw-r--r--src/or/or.h28
5 files changed, 244 insertions, 82 deletions
diff --git a/src/or/Makefile.am b/src/or/Makefile.am
index 7d6c9eb0b9..e9916d5188 100644
--- a/src/or/Makefile.am
+++ b/src/or/Makefile.am
@@ -41,14 +41,14 @@ AM_CPPFLAGS = -DSHARE_DATADIR="\"$(datadir)\"" \
tor_LDFLAGS = @TOR_LDFLAGS_zlib@ @TOR_LDFLAGS_openssl@ @TOR_LDFLAGS_libevent@
tor_LDADD = ../common/libor.a ../common/libor-crypto.a \
../common/libor-event.a \
- -lz -levent -lssl -lcrypto @TOR_LIB_WS32@ @TOR_LIB_GDI@
+ -lz -lm -levent -lssl -lcrypto @TOR_LIB_WS32@ @TOR_LIB_GDI@
test_SOURCES = $(COMMON_SRC) test_data.c test.c
test_LDFLAGS = @TOR_LDFLAGS_zlib@ @TOR_LDFLAGS_openssl@ \
@TOR_LDFLAGS_libevent@
test_LDADD = ../common/libor.a ../common/libor-crypto.a \
../common/libor-event.a \
- -lz -levent -lssl -lcrypto @TOR_LIB_WS32@ @TOR_LIB_GDI@
+ -lz -lm -levent -lssl -lcrypto @TOR_LIB_WS32@ @TOR_LIB_GDI@
noinst_HEADERS = or.h eventdns.h eventdns_tor.h micro-revision.i
diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c
index 7334308403..582567b7ee 100644
--- a/src/or/circuitbuild.c
+++ b/src/or/circuitbuild.c
@@ -9,14 +9,23 @@
* \brief The actual details of building circuits.
**/
+#include <math.h>
+
+long int lround(double x);
+
+double
+ln(double x)
+{
+ return log(x);
+}
+#undef log
+
#include "or.h"
+#include "crypto.h"
/********* START VARIABLES **********/
/** Global list of circuit build times */
-// XXX: Make this a smartlist..
-uint16_t circuit_build_times[NCIRCUITS_TO_OBSERVE];
-int build_times_idx = 0;
-int total_build_times = 0;
+circuit_build_times_t circ_times;
/** A global list of all circuits at this hop. */
extern circuit_t *global_circuitlist;
@@ -65,25 +74,24 @@ static int onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice);
static void entry_guards_changed(void);
static time_t start_of_month(time_t when);
-static int circuit_build_times_add_time(time_t time);
-/** circuit_build_times is a circular array, so loop around when
- * array is full
+/**
+ * circuit_build_times is a circular array, so loop around when
+ * array is full
*
* time units are milliseconds
*/
-static
int
-circuit_build_times_add_time(long time)
+circuit_build_times_add_time(circuit_build_times_t *cbt, long time)
{
- if(time > UINT16_MAX) {
+ if (time > UINT16_MAX) {
log_notice(LD_CIRC,
- "Circuit build time of %dms exceeds max. Capping at 65536ms", time);
+ "Circuit build time of %ldms exceeds max. Capping at 65536ms", time);
time = UINT16_MAX;
}
- circuit_build_times[build_times_idx] = time;
- build_times_idx = (build_times_idx + 1) % NCIRCUITS_TO_OBSERVE;
- if(total_build_times + 1 < NCIRCUITS_TO_OBSERVE)
- total_build_times++;
+ cbt->circuit_build_times[cbt->build_times_idx] = time;
+ cbt->build_times_idx = (cbt->build_times_idx + 1) % NCIRCUITS_TO_OBSERVE;
+ if (cbt->total_build_times + 1 < NCIRCUITS_TO_OBSERVE)
+ cbt->total_build_times++;
return 0;
}
@@ -91,15 +99,16 @@ circuit_build_times_add_time(long time)
/**
* Calculate histogram
*/
-void
-circuit_build_times_create_histogram(uint16_t * histogram)
+static void
+circuit_build_times_create_histogram(circuit_build_times_t *cbt,
+ uint16_t *histogram)
{
- int i, c;
- // calculate histogram
- for(i = 0; i < NCIRCUITS_TO_OBSERVE; i++) {
- if(circuit_build_times[i] == 0) continue; /* 0 <-> uninitialized */
+ int i, c;
+ // calculate histogram
+ for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) {
+ if (cbt->circuit_build_times[i] == 0) continue; /* 0 <-> uninitialized */
- c = (circuit_build_times[i] / BUILDTIME_BIN_WIDTH);
+ c = (cbt->circuit_build_times[i] / BUILDTIME_BIN_WIDTH);
histogram[c]++;
}
}
@@ -107,26 +116,29 @@ circuit_build_times_create_histogram(uint16_t * histogram)
/**
* Find maximum circuit build time
*/
-uint16_t
-circuit_build_times_max()
+static uint16_t
+circuit_build_times_max(circuit_build_times_t *cbt)
{
int i = 0, max_build_time = 0;
- for( i = 0; i < NCIRCUITS_TO_OBSERVE; i++) {
- if(circuit_build_times[i] > max_build_time)
- max_build_time = circuit_build_times[i];
+ for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) {
+ if (cbt->circuit_build_times[i] > max_build_time)
+ max_build_time = cbt->circuit_build_times[i];
}
return max_build_time;
}
-uint16_t
-circuit_build_times_min()
+static uint16_t
+circuit_build_times_min(circuit_build_times_t *cbt)
{
int i = 0;
uint16_t min_build_time = UINT16_MAX;
- for( i = 0; i < NCIRCUITS_TO_OBSERVE; i++) {
- if(circuit_build_times[i] && /* 0 <-> uninitialized */
- circuit_build_times[i] < min_build_time)
- min_build_time = circuit_build_times[i];
+ for (i = 0; i < NCIRCUITS_TO_OBSERVE; i++) {
+ if (cbt->circuit_build_times[i] && /* 0 <-> uninitialized */
+ cbt->circuit_build_times[i] < min_build_time)
+ min_build_time = cbt->circuit_build_times[i];
+ }
+ if (min_build_time == UINT16_MAX) {
+ log_warn(LD_CIRC, "No build times less than UIN16_MAX!");
}
return min_build_time;
}
@@ -135,85 +147,218 @@ circuit_build_times_min()
* output a histogram of current circuit build times
*/
void
-circuit_build_times_update_state(or_state_t * state)
+circuit_build_times_update_state(circuit_build_times_t *cbt,
+ or_state_t *state)
{
uint16_t max_build_time = 0, *histogram;
int i = 0, nbins = 0;
config_line_t **next, *line;
- max_build_time = circuit_build_times_max();
+ max_build_time = circuit_build_times_max(cbt);
nbins = 1 + (max_build_time / BUILDTIME_BIN_WIDTH);
histogram = tor_malloc_zero(nbins * sizeof(uint16_t));
- circuit_build_times_create_histogram(histogram);
+ circuit_build_times_create_histogram(cbt, histogram);
// write to state
config_free_lines(state->BuildtimeHistogram);
next = &state->BuildtimeHistogram;
*next = NULL;
- state->TotalBuildTimes = total_build_times;
+ state->TotalBuildTimes = cbt->total_build_times;
// total build times?
- for(i = 0; i < nbins; i++) {
- if(histogram[i] == 0) continue; // compress the histogram by skipping the blanks
+ for (i = 0; i < nbins; i++) {
+ // compress the histogram by skipping the blanks
+ if (histogram[i] == 0) continue;
*next = line = tor_malloc_zero(sizeof(config_line_t));
line->key = tor_strdup("CircuitBuildTimeBin");
- line->value = tor_malloc(20);
- tor_snprintf(line->value, 20, "%d %d", i*BUILDTIME_BIN_WIDTH,
+ line->value = tor_malloc(25);
+ tor_snprintf(line->value, 25, "%d %d", i*BUILDTIME_BIN_WIDTH,
histogram[i]);
next = &(line->next);
}
- if(!get_options()->AvoidDiskWrites)
+ if (!get_options()->AvoidDiskWrites)
or_state_mark_dirty(get_or_state(), 0);
- if(histogram) tor_free(histogram);
-}
-
-int
-find_next_available(int chosen)
-{// find index of next open slot in circuit_build_times
- int idx = 0;
- for(idx = (chosen + 1) % NCIRCUITS_TO_OBSERVE; idx < chosen;
- idx = ((idx + 1 ) % NCIRCUITS_TO_OBSERVE)) {
- if(circuit_build_times[idx] == 0) {
- return idx;
- }
- }
- return 0;
+ if (histogram) tor_free(histogram);
}
/** Load histogram from state */
int
-circuit_build_times_parse_state(or_state_t *state, char **msg)
+circuit_build_times_parse_state(circuit_build_times_t *cbt,
+ or_state_t *state, char **msg)
{
+ int tot_values = 0, lines = 0;
config_line_t *line;
msg = NULL;
- memset(circuit_build_times, 0, NCIRCUITS_TO_OBSERVE);
- total_build_times = state->TotalBuildTimes;
+ memset(cbt->circuit_build_times, 0, NCIRCUITS_TO_OBSERVE);
+ cbt->total_build_times = state->TotalBuildTimes;
- for(line = state->BuildtimeHistogram; line; line = line->next) {
+ for (line = state->BuildtimeHistogram; line; line = line->next) {
smartlist_t * args = smartlist_create();
smartlist_split_string(args, line->value, " ",
SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 0);
- if(smartlist_len(args) < 2) {
+ if (smartlist_len(args) < 2) {
*msg = tor_strdup("Unable to parse circuit build times: "
"Too few arguments to CircuitBuildTIme");
break;
} else {
- uint16_t ms, count, i;
- /* XXX: use tor_strtol */
- ms = atol(smartlist_get(args,0));
- count = atol(smartlist_get(args,1));
- for(i = 0; i < count; i++) {
- circuit_build_times_add_time(ms);
+ const char *ms_str = smartlist_get(args,0);
+ const char *count_str = smartlist_get(args,1);
+ uint32_t count, i;
+ uint16_t ms;
+ int ok;
+ ms = tor_parse_ulong(ms_str, 0, 0, UINT16_MAX, &ok, NULL);
+ if (!ok) {
+ *msg = tor_strdup("Unable to parse circuit build times: "
+ "Unparsable bin number");
+ break;
+ }
+ count = tor_parse_ulong(count_str, 0, 0, UINT32_MAX, &ok, NULL);
+ if (!ok) {
+ *msg = tor_strdup("Unable to parse circuit build times: "
+ "Unparsable bin count");
+ break;
+ }
+ lines++;
+ for (i = 0; i < count; i++) {
+ circuit_build_times_add_time(cbt, ms);
+ tot_values++;
}
}
}
+ log_info(LD_CIRC, "Loaded %d values from %d lines in circuit time histogram",
+ tot_values, lines);
+ circuit_build_times_set_timeout(cbt);
return (msg ? -1 : 0);
}
+static void
+circuit_build_times_update_alpha(circuit_build_times_t *cbt)
+{
+ uint16_t *x=cbt->circuit_build_times;
+ double a = 0;
+ int n=0,i=0;
+ /* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */
+ cbt->Xm = circuit_build_times_min(cbt);
+
+ for (i=0; i< NCIRCUITS_TO_OBSERVE; i++) {
+ if (!x[i]) continue;
+ a += ln(x[i]);
+ n++;
+ }
+ tor_assert(n==cbt->total_build_times);
+ a -= n*ln(cbt->Xm);
+ a = n/a;
+
+ cbt->alpha = a;
+}
+/**
+ * This is the Pareto Quantile Function. It calculates the point x
+ * in the distribution such that F(x) < quantile (ie quantile*100%
+ * of the mass of the density function is below x on the curve).
+ *
+ * We use it to calculate the timeout and also synthetic values of
+ * time for circuits that timeout before completion.
+ *
+ * See http://en.wikipedia.org/wiki/Quantile_function,
+ * http://en.wikipedia.org/wiki/Inverse_transform_sampling and
+ * http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation
+ * That's right. I'll cite wikipedia all day long.
+ */
+static double
+circuit_build_times_calculate_timeout(circuit_build_times_t *cbt,
+ double quantile)
+{
+ return cbt->Xm/pow(1.0-quantile,1.0/cbt->alpha);
+}
+
+static void
+circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt)
+{
+ /* Generate 0.8-1.0... */
+ uint64_t r = crypto_rand_uint64(UINT64_MAX-1);
+ double u = BUILDTIMEOUT_QUANTILE_CUTOFF +
+ ((1.0-BUILDTIMEOUT_QUANTILE_CUTOFF)*r)/(1.0*UINT64_MAX);
+
+ long gentime = lround(circuit_build_times_calculate_timeout(cbt, u));
+
+ if (gentime < get_options()->CircuitBuildTimeout*1000) {
+ log_warn(LD_CIRC,
+ "Generated a synthetic timeout LESS than the current timeout: %ld vs %d",
+ gentime, get_options()->CircuitBuildTimeout*1000);
+ tor_assert(gentime >= get_options()->CircuitBuildTimeout*1000);
+ } else if (gentime > UINT16_MAX) {
+ gentime = UINT16_MAX;
+ log_info(LD_CIRC,
+ "Generated a synthetic timeout LESS than the current timeout: %ld vs %d",
+ gentime, get_options()->CircuitBuildTimeout*1000);
+ } else {
+ log_info(LD_CIRC, "Generated synthetic time %ld for timeout",
+ gentime);
+ }
+
+ circuit_build_times_add_time(cbt, gentime);
+}
+
+/**
+ * Store a timeout as a synthetic value
+ */
+void
+circuit_build_times_add_timeout(circuit_build_times_t *cbt)
+{
+ if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) {
+ /* Store a timeout before we have enough data as special */
+ cbt->pre_timeouts++;
+ return;
+ }
+
+ /* Store a timeout as a random position on this curve */
+ if (cbt->pre_timeouts && get_options()->CircuitBuildTimeout != 60) {
+ cbt->Xm = circuit_build_times_min(cbt);
+ // Use current timeout to get an estimate on alpha
+ // Q(u) = Xm/((1-u)^(1/a))
+ // Q(0.8) = Xm/((1-0.8))^(1/a)) = CircBuildTimeout
+ // CircBuildTimeout = Xm/((1-0.8))^(1/a))
+ // CircBuildTimeout = Xm*((1-0.8))^(-1/a))
+ // ln(CircBuildTimeout) = ln(Xm)+ln(((1-0.8)))*(-1/a)
+ // -ln(1-0.8)/(ln(CircBuildTimeout)-ln(Xm))=a
+ cbt->alpha = -ln(1-BUILDTIMEOUT_QUANTILE_CUTOFF)/
+ (ln(get_options()->CircuitBuildTimeout)-ln(cbt->Xm));
+ while (cbt->pre_timeouts-- != 0) {
+ circuit_build_times_add_timeout_worker(cbt);
+ }
+ }
+
+ cbt->pre_timeouts = 0;
+ circuit_build_times_add_timeout_worker(cbt);
+}
+
+void
+circuit_build_times_set_timeout(circuit_build_times_t *cbt)
+{
+ double timeout;
+
+ if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) {
+ log_info(LD_CIRC,
+ "Not enough circuits yet to calculate a new build timeout."
+ " Need %d more.",
+ MIN_CIRCUITS_TO_OBSERVE-cbt->total_build_times);
+ return;
+ }
+
+ circuit_build_times_update_alpha(cbt);
+ timeout = circuit_build_times_calculate_timeout(cbt,
+ BUILDTIMEOUT_QUANTILE_CUTOFF);
+
+ get_options()->CircuitBuildTimeout = lround(timeout/1000.0);
+
+ log_info(LD_CIRC,
+ "Set circuit build timeout to %d based on %d circuit times",
+ get_options()->CircuitBuildTimeout, cbt->total_build_times);
+}
/** Iterate over values of circ_id, starting from conn-\>next_circ_id,
* and with the high bit specified by conn-\>circ_id_type, until we get
@@ -800,9 +945,9 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
tor_gettimeofday(&end);
/* done building the circuit. whew. */
circuit_set_state(TO_CIRCUIT(circ), CIRCUIT_STATE_OPEN);
- circuit_build_times_add_time(tor_mdiff(&circ->_base.timestamp_created,
- &end));
- circuit_build_times_recompute();
+ circuit_build_times_add_time(&circ_times,
+ tv_mdiff(&circ->_base.highres_created, &end));
+ circuit_build_times_set_timeout(&circ_times);
log_info(LD_CIRC,"circuit built!");
circuit_reset_failure_count(0);
if (circ->build_state->onehop_tunnel)
diff --git a/src/or/circuituse.c b/src/or/circuituse.c
index ee2d0bbabf..e93d28df72 100644
--- a/src/or/circuituse.c
+++ b/src/or/circuituse.c
@@ -358,6 +358,8 @@ circuit_expire_building(time_t now)
circuit_log_path(LOG_INFO,LD_CIRC,TO_ORIGIN_CIRCUIT(victim));
circuit_mark_for_close(victim, END_CIRC_REASON_TIMEOUT);
+ circuit_build_times_add_timeout(&circ_times);
+ circuit_build_times_set_timeout(&circ_times);
}
}
diff --git a/src/or/config.c b/src/or/config.c
index 0345ca7281..39a4ac139d 100644
--- a/src/or/config.c
+++ b/src/or/config.c
@@ -413,7 +413,6 @@ static config_var_t _state_vars[] = {
VAR("CircuitBuildTimeBin", LINELIST_S, BuildtimeHistogram, NULL),
VAR("BuildtimeHistogram", LINELIST_V, BuildtimeHistogram, NULL),
-
{ NULL, CONFIG_TYPE_OBSOLETE, 0, NULL }
};
@@ -2923,7 +2922,7 @@ compute_publishserverdescriptor(or_options_t *options)
/** Lowest allowable value for CircuitBuildTimeout; values too low will
* increase network load because of failing connections being retried, and
* might prevent users from connecting to the network at all. */
-#define MIN_CIRCUIT_BUILD_TIMEOUT 30
+#define MIN_CIRCUIT_BUILD_TIMEOUT 5
/** Lowest allowable value for MaxCircuitDirtiness; if this is too low, Tor
* will generate too many circuits and potentially overload the network. */
@@ -5070,7 +5069,7 @@ or_state_set(or_state_t *new_state)
tor_free(err);
}
- if(circuit_build_times_parse_state(global_state, &err) < 0) {
+ if (circuit_build_times_parse_state(&circ_times, global_state, &err) < 0) {
log_warn(LD_GENERAL,"%s",err);
tor_free(err);
@@ -5208,7 +5207,7 @@ or_state_save(time_t now)
* to avoid redundant writes. */
entry_guards_update_state(global_state);
rep_hist_update_state(global_state);
- circuit_build_times_update_state(global_state);
+ circuit_build_times_update_state(&circ_times, global_state);
if (accounting_is_enabled(get_options()))
accounting_run_housekeeping(now);
diff --git a/src/or/or.h b/src/or/or.h
index be54ab4bf7..5e94a56e7c 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -1890,6 +1890,8 @@ typedef struct crypt_path_t {
#define NCIRCUITS_TO_OBSERVE 10000 /* approx 3 weeks worth of circuits */
#define BUILDTIME_BIN_WIDTH 50
+/* TODO: This should be moved to the consensus */
+#define BUILDTIMEOUT_QUANTILE_CUTOFF 0.8
/** Information used to build a circuit. */
typedef struct {
@@ -1984,7 +1986,7 @@ typedef struct circuit_t {
time_t timestamp_created; /**< When was this circuit created? */
time_t timestamp_dirty; /**< When the circuit was first used, or 0 if the
* circuit is clean. */
- struct timeval highres_created; /**< When exactly was this circuit created? */
+ struct timeval highres_created; /**< When exactly was the circuit created? */
uint16_t marked_for_close; /**< Should we close this circuit at the end of
* the main loop? (If true, holds the line number
@@ -2695,7 +2697,6 @@ typedef struct {
config_line_t * BuildtimeHistogram;
uint16_t TotalBuildTimes;
-
/** What version of Tor wrote this state file? */
char *TorVersion;
@@ -2865,10 +2866,25 @@ void bridges_retry_all(void);
void entry_guards_free_all(void);
-void circuit_build_times_update_state(or_state_t *state);
-int circuit_build_times_parse_state(or_state_t *state, char **msg);
-
-
+/* Circuit Build Timeout "public" functions (I love C... No wait.) */
+typedef struct {
+ // XXX: Make this a smartlist..
+ uint16_t circuit_build_times[NCIRCUITS_TO_OBSERVE];
+ int build_times_idx;
+ int total_build_times;
+ int pre_timeouts;
+ uint16_t Xm;
+ double alpha;
+} circuit_build_times_t;
+
+extern circuit_build_times_t circ_times;
+void circuit_build_times_update_state(circuit_build_times_t *cbt,
+ or_state_t *state);
+int circuit_build_times_parse_state(circuit_build_times_t *cbt,
+ or_state_t *state, char **msg);
+void circuit_build_times_add_timeout(circuit_build_times_t *cbt);
+void circuit_build_times_set_timeout(circuit_build_times_t *cbt);
+int circuit_build_times_add_time(circuit_build_times_t *cbt, long time);
/********************************* circuitlist.c ***********************/