summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2007-11-08 16:58:59 +0000
committerNick Mathewson <nickm@torproject.org>2007-11-08 16:58:59 +0000
commite0b9c893bc4e60cb3e6d8aa65d49a2a4ba2b6260 (patch)
tree5394c35c788ee05f37bfb75fc5feca7c12dbd11a
parent46273c908020144d683365b956bfa9950b815296 (diff)
downloadtor-e0b9c893bc4e60cb3e6d8aa65d49a2a4ba2b6260.tar.gz
tor-e0b9c893bc4e60cb3e6d8aa65d49a2a4ba2b6260.zip
r16573@catbus: nickm | 2007-11-08 11:57:16 -0500
Mess with the formula for the Guard flag again. Now it requires that you be in the most familiar 7/8 of nodes, and have above median wfu for that 7/8th. See spec for details. Also, log thresholds better. svn:r12440
-rw-r--r--ChangeLog4
-rw-r--r--doc/TODO2
-rw-r--r--doc/spec/dir-spec.txt19
-rw-r--r--src/common/container.c1
-rw-r--r--src/common/container.h6
-rw-r--r--src/or/dirserv.c67
-rw-r--r--src/or/or.h1
-rw-r--r--src/or/rephist.c32
8 files changed, 108 insertions, 24 deletions
diff --git a/ChangeLog b/ChangeLog
index 527a709368..68c1540f96 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -241,6 +241,10 @@ Changes in version 0.2.0.9-alpha - 2007-10-24
should exist before trying to replace the current one.
- Use a more forgiving schedule for retrying failed consensus
downloads than for other types.
+ - Adjust the guard selection formula that authorities use a little more:
+ require guards to be in the top 7/8 in terms of how long we have
+ known about them, and above the median of those nodes in terms of
+ weighted fractional uptime.
o Minor bugfixes (other directory issues):
- Correct the implementation of "download votes by digest." Bugfix on
diff --git a/doc/TODO b/doc/TODO
index 6fe91756d3..1c1d054a98 100644
--- a/doc/TODO
+++ b/doc/TODO
@@ -23,7 +23,7 @@ Items blocking 0.2.0.10-alpha:
after we've picked it". We should write a real proposal for this --
in 0.2.1.x.
- Delay the separation of flags till 0.2.1.x. -NM
- - Let's come up with a good formula for Guard.
+ o Let's come up with a good formula for Guard.
Here's a go:
diff --git a/doc/spec/dir-spec.txt b/doc/spec/dir-spec.txt
index 9057eed25b..0e6343e6c3 100644
--- a/doc/spec/dir-spec.txt
+++ b/doc/spec/dir-spec.txt
@@ -972,11 +972,11 @@ $Id$
"Running" -- A router is 'Running' if the authority managed to connect to
it successfully within the last 30 minutes.
- "Stable" -- A router is 'Stable' if it is active, and either its
- Weighted MTBF is at least the median for known active routers or
- its Weighted MTBF is at least 10 days. Routers are never called Stable if
- they are running a version of Tor known to drop circuits stupidly.
- (0.1.1.10-alpha through 0.1.1.16-rc are stupid this way.)
+ "Stable" -- A router is 'Stable' if it is active, and either its Weighted
+ MTBF is at least the median for known active routers or its Weighted MTBF
+ corresponds to at least 7 days. Routers are never called Stable if they are
+ running a version of Tor known to drop circuits stupidly. (0.1.1.10-alpha
+ through 0.1.1.16-rc are stupid this way.)
To calculate weighted MTBF, compute the weighted mean of the lengths
of all intervals when the router was observed to be up, weighting
@@ -991,9 +991,9 @@ $Id$
either in the top 7/8ths for known active routers or at least 100KB/s.
"Guard" -- A router is a possible 'Guard' if its Weighted Fractional
- Uptime is at least the median for known active routers, and its bandwidth
- is either at least the median for known active routers or at least
- 250KB/s. If the total bandwidth of active non-BadExit Exit servers is less
+ Uptime is at least the median for "familiar" active routers, and if
+ its bandwidth is at least median or at least 250KB/s.
+ If the total bandwidth of active non-BadExit Exit servers is less
than one third of the total bandwidth of all active servers, no Exit is
listed as a Guard.
@@ -1001,6 +1001,9 @@ $Id$
of time that the router is up in any given day, weighting so that
downtime and uptime in the past counts less.
+ A node is 'familiar' if 1/8 of all active nodes have appeared more
+ recently than it, OR it has been around for a few weeks.
+
"Authority" -- A router is called an 'Authority' if the authority
generating the network-status document believes it is an authority.
diff --git a/src/common/container.c b/src/common/container.c
index 02e095f359..77d747caad 100644
--- a/src/common/container.c
+++ b/src/common/container.c
@@ -1214,4 +1214,5 @@ IMPLEMENT_ORDER_FUNC(find_nth_int, int)
IMPLEMENT_ORDER_FUNC(find_nth_time, time_t)
IMPLEMENT_ORDER_FUNC(find_nth_double, double)
IMPLEMENT_ORDER_FUNC(find_nth_uint32, uint32_t)
+IMPLEMENT_ORDER_FUNC(find_nth_long, long)
diff --git a/src/common/container.h b/src/common/container.h
index bbf654f5f2..6e1e1801c4 100644
--- a/src/common/container.h
+++ b/src/common/container.h
@@ -322,6 +322,7 @@ int find_nth_int(int *array, int n_elements, int nth);
time_t find_nth_time(time_t *array, int n_elements, int nth);
double find_nth_double(double *array, int n_elements, int nth);
uint32_t find_nth_uint32(uint32_t *array, int n_elements, int nth);
+long find_nth_long(long *array, int n_elements, int nth);
static INLINE int
median_int(int *array, int n_elements)
{
@@ -342,6 +343,11 @@ median_uint32(uint32_t *array, int n_elements)
{
return find_nth_uint32(array, n_elements, (n_elements-1)/2);
}
+static INLINE long
+median_long(long *array, int n_elements)
+{
+ return find_nth_long(array, n_elements, (n_elements-1)/2);
+}
#endif
diff --git a/src/or/dirserv.c b/src/or/dirserv.c
index a1ecdb8848..b0793cc118 100644
--- a/src/or/dirserv.c
+++ b/src/or/dirserv.c
@@ -1459,14 +1459,22 @@ should_generate_v2_networkstatus(void)
* current guards. */
#define UPTIME_TO_GUARANTEE_STABLE (3600*24*30)
/** If a router's MTBF is at least this value, then it is always stable.
- * See above. */
-#define MTBF_TO_GUARANTEE_STABLE (60*60*24*10)
+ * See above. (Corresponds to about 7 days for current decay rates.) */
+#define MTBF_TO_GUARANTEE_STABLE (60*60*24*5)
/** Similarly, we protect sufficiently fast nodes from being pushed
* out of the set of Fast nodes. */
#define BANDWIDTH_TO_GUARANTEE_FAST (100*1024)
/** Similarly, every node with sufficient bandwidth can be considered
* for Guard status. */
#define BANDWIDTH_TO_GUARANTEE_GUARD (250*1024)
+/** Similarly, every node with at least this much weighted time known can be
+ * considered familiar enough to be a guard. Corresponds to about 20 days for
+ * current decay rates.
+ */
+#define TIME_KNOWN_TO_GUARANTEE_FAMILIAR (8*24*60*60)
+/** Similarly, every node with sufficient WFU is around enough to be a guard.
+ */
+#define WFU_TO_GUARANTEE_GUARD (0.995)
/* Thresholds for server performance: set by
* dirserv_compute_performance_thresholds, and used by
@@ -1475,6 +1483,7 @@ static uint32_t stable_uptime = 0; /* start at a safe value */
static double stable_mtbf = 0.0;
static int enough_mtbf_info = 0;
static double guard_wfu = 0.0;
+static long guard_tk = 0;
static uint32_t fast_bandwidth = 0;
static uint32_t guard_bandwidth_including_exits = 0;
static uint32_t guard_bandwidth_excluding_exits = 0;
@@ -1514,13 +1523,13 @@ dirserv_thinks_router_is_unreliable(time_t now,
} else {
double mtbf =
rep_hist_get_stability(router->cache_info.identity_digest, now);
- if (mtbf < stable_mtbf && mtbf < MTBF_TO_GUARANTEE_STABLE)
+ if (mtbf < stable_mtbf)
return 1;
}
}
if (need_capacity) {
uint32_t bw = router_get_advertised_bandwidth(router);
- if (bw < fast_bandwidth && bw < BANDWIDTH_TO_GUARANTEE_FAST)
+ if (bw < fast_bandwidth)
return 1;
}
return 0;
@@ -1550,16 +1559,22 @@ dirserv_thinks_router_is_hs_dir(routerinfo_t *router, time_t now)
static void
dirserv_compute_performance_thresholds(routerlist_t *rl)
{
- int n_active, n_active_nonexit;
+ int n_active, n_active_nonexit, n_familiar;
uint32_t *uptimes, *bandwidths, *bandwidths_excluding_exits;
+ long *tks;
double *mtbfs, *wfus;
time_t now = time(NULL);
+ /* DOCDOC this is a litle tricky; comment this function better. */
+
/* initialize these all here, in case there are no routers */
stable_uptime = 0;
+ stable_mtbf = 0;
fast_bandwidth = 0;
guard_bandwidth_including_exits = 0;
guard_bandwidth_excluding_exits = 0;
+ guard_tk = 0;
+ guard_wfu = 0;
total_bandwidth = 0;
total_exit_bandwidth = 0;
@@ -1570,6 +1585,7 @@ dirserv_compute_performance_thresholds(routerlist_t *rl)
bandwidths_excluding_exits =
tor_malloc(sizeof(uint32_t)*smartlist_len(rl->routers));
mtbfs = tor_malloc(sizeof(double)*smartlist_len(rl->routers));
+ tks = tor_malloc(sizeof(long)*smartlist_len(rl->routers));
wfus = tor_malloc(sizeof(double)*smartlist_len(rl->routers));
SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, {
@@ -1579,7 +1595,7 @@ dirserv_compute_performance_thresholds(routerlist_t *rl)
ri->is_exit = exit_policy_is_general_exit(ri->exit_policy);
uptimes[n_active] = real_uptime(ri, now);
mtbfs[n_active] = rep_hist_get_stability(id, now);
- wfus [n_active] = rep_hist_get_weighted_fractional_uptime(id, now);
+ tks [n_active] = rep_hist_get_weighted_time_known(id, now);
bandwidths[n_active] = bw = router_get_advertised_bandwidth(ri);
total_bandwidth += bw;
if (ri->is_exit && !ri->is_bad_exit) {
@@ -1595,14 +1611,35 @@ dirserv_compute_performance_thresholds(routerlist_t *rl)
if (n_active) {
stable_uptime = median_uint32(uptimes, n_active);
stable_mtbf = median_double(mtbfs, n_active);
- guard_wfu = median_double(wfus, n_active);
fast_bandwidth = find_nth_uint32(bandwidths, n_active, n_active/8);
/* Now bandwidths is sorted. */
if (fast_bandwidth < ROUTER_REQUIRED_MIN_BANDWIDTH)
fast_bandwidth = bandwidths[n_active/4];
guard_bandwidth_including_exits = bandwidths[(n_active-1)/2];
+ guard_tk = find_nth_long(tks, n_active, n_active/8);
}
+ if (guard_tk > TIME_KNOWN_TO_GUARANTEE_FAMILIAR)
+ guard_tk = TIME_KNOWN_TO_GUARANTEE_FAMILIAR;
+
+ if (fast_bandwidth > BANDWIDTH_TO_GUARANTEE_FAST)
+ fast_bandwidth = BANDWIDTH_TO_GUARANTEE_FAST;
+
+ n_familiar = 0;
+ SMARTLIST_FOREACH(rl->routers, routerinfo_t *, ri, {
+ if (router_is_active(ri, now)) {
+ const char *id = ri->cache_info.identity_digest;
+ long tk = rep_hist_get_weighted_time_known(id, now);
+ if (tk < guard_tk)
+ continue;
+ wfus[n_familiar++] = rep_hist_get_weighted_fractional_uptime(id, now);
+ }
+ });
+ if (n_familiar)
+ guard_wfu = median_double(wfus, n_familiar);
+ if (guard_wfu > WFU_TO_GUARANTEE_GUARD)
+ guard_wfu = WFU_TO_GUARANTEE_GUARD;
+
enough_mtbf_info = rep_hist_have_measured_enough_stability();
if (n_active_nonexit) {
@@ -1610,19 +1647,25 @@ dirserv_compute_performance_thresholds(routerlist_t *rl)
median_uint32(bandwidths_excluding_exits, n_active_nonexit);
}
- /*XXXX020 Log the other stuff too. */
log(LOG_INFO, LD_DIRSERV,
- "Cutoffs: %lus uptime, %lu b/s fast, %lu or %lu b/s guard.",
+ "Cutoffs: For Stable, %lu sec uptime, %lu sec MTBF. "
+ "For Fast: %lu bytes/sec."
+ "For Guard: WFU %.03lf%%, time-known %lu sec, "
+ "and bandwidth %lu or %lu bytes/sec.",
(unsigned long)stable_uptime,
+ (unsigned long)stable_mtbf,
(unsigned long)fast_bandwidth,
+ guard_wfu*100,
+ (unsigned long)guard_tk,
(unsigned long)guard_bandwidth_including_exits,
(unsigned long)guard_bandwidth_excluding_exits);
tor_free(uptimes);
tor_free(mtbfs);
- tor_free(wfus);
tor_free(bandwidths);
tor_free(bandwidths_excluding_exits);
+ tor_free(tks);
+ tor_free(wfus);
}
/** Given a platform string as in a routerinfo_t (possibly null), return a
@@ -1852,9 +1895,11 @@ set_routerstatus_from_routerinfo(routerstatus_t *rs,
router_get_advertised_bandwidth(ri) >=
(exits_can_be_guards ? guard_bandwidth_including_exits :
guard_bandwidth_excluding_exits))) {
+ long tk = rep_hist_get_weighted_time_known(
+ ri->cache_info.identity_digest, now);
double wfu = rep_hist_get_weighted_fractional_uptime(
ri->cache_info.identity_digest, now);
- rs->is_possible_guard = (wfu >= guard_wfu) ? 1 : 0;
+ rs->is_possible_guard = (wfu >= guard_wfu && tk >= guard_tk) ? 1 : 0;
} else {
rs->is_possible_guard = 0;
}
diff --git a/src/or/or.h b/src/or/or.h
index 93eb639d67..7479ec9dc2 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3441,6 +3441,7 @@ int rep_hist_load_mtbf_data(time_t now);
time_t rep_hist_downrate_old_runs(time_t now);
double rep_hist_get_stability(const char *id, time_t when);
double rep_hist_get_weighted_fractional_uptime(const char *id, time_t when);
+long rep_hist_get_weighted_time_known(const char *id, time_t when);
int rep_hist_have_measured_enough_stability(void);
void rep_hist_note_used_port(uint16_t port, time_t now);
diff --git a/src/or/rephist.c b/src/or/rephist.c
index 98d02d90d8..30c54fa473 100644
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@@ -384,8 +384,18 @@ get_stability(or_history_t *hist, time_t when)
return total / total_weights;
}
-/* Until we've known about you for this long, you simply can't be up. */
-#define MIN_WEIGHTED_TIME_TO_BE_UP (18*60*60)
+/** DODDOC */
+static long
+get_total_weighted_time(or_history_t *hist, time_t when)
+{
+ long total = hist->total_weighted_time;
+ if (hist->start_of_run) {
+ total += (when - hist->start_of_run);
+ } else if (hist->start_of_downtime) {
+ total += (when - hist->start_of_downtime);
+ }
+ return total;
+}
/** Helper: Return the weighted percent-of-time-online of the router with
* history <b>hist</b>. */
@@ -402,8 +412,6 @@ get_weighted_fractional_uptime(or_history_t *hist, time_t when)
} else if (hist->start_of_downtime) {
total += (when - hist->start_of_downtime);
}
- if (total < MIN_WEIGHTED_TIME_TO_BE_UP)
- return 0.0;
return ((double) up) / total;
}
@@ -431,6 +439,22 @@ rep_hist_get_weighted_fractional_uptime(const char *id, time_t when)
return get_weighted_fractional_uptime(hist, when);
}
+/** Return a number representing how long we've known about the router whose
+ * digest is <b>id</b>. Return 0 if the router is unknown.
+ *
+ * Be careful: this measure incresases monotonically as we know the router for
+ * longer and longer, but it doesn't increase linearly.
+ */
+long
+rep_hist_get_weighted_time_known(const char *id, time_t when)
+{
+ or_history_t *hist = get_or_history(id);
+ if (!hist)
+ return 0;
+
+ return get_total_weighted_time(hist, when);
+}
+
/** Return true if we've been measuring MTBFs for long enough to
* prounounce on Stability. */
int