aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog12
-rw-r--r--src/or/dirserv.c6
-rw-r--r--src/or/eventdns.c2
-rw-r--r--src/or/main.c9
-rw-r--r--src/or/or.h5
-rw-r--r--src/or/rephist.c20
-rw-r--r--src/or/routerlist.c41
7 files changed, 56 insertions, 39 deletions
diff --git a/ChangeLog b/ChangeLog
index 88b2343059..a754d16464 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -40,9 +40,21 @@ Changes in version 0.2.2.1-alpha - 2009-??-??
Changes in version 0.2.1.17-?? - 2009-??-??
+ o Major bugfixes:
+ - Directory authorities were neglecting to mark relays down in their
+ internal histories if the relays fall off the routerlist without
+ ever being found unreachable. So there were relays in the histories
+ that haven't been seen for eight months, and are listed as being
+ up for eight months. This wreaked havoc on the "median wfu"
+ and "median mtbf" calculations, in turn making Guard and Stable
+ flags very wrong, hurting network performance. Fixes bugs 696 and
+ 969. Bugfix on 0.2.0.6-alpha.
+
o Minor bugfixes:
- Serve the DirPortFrontPage page even when we have been approaching
our quotas recently. Fixes bug 1013; bugfix on 0.2.1.8-alpha.
+ - Do not cap bandwidths reported by directory authorities; they are
+ already adjusted to reflect reality.
o Major features:
- Clients now use the bandwidth values in the consensus, rather than
diff --git a/src/or/dirserv.c b/src/or/dirserv.c
index 76ac3978f8..f355fdf03e 100644
--- a/src/or/dirserv.c
+++ b/src/or/dirserv.c
@@ -797,7 +797,7 @@ directory_remove_invalid(void)
if (r & FP_REJECT) {
log_info(LD_DIRSERV, "Router '%s' is now rejected: %s",
ent->nickname, msg?msg:"");
- routerlist_remove(rl, ent, 0);
+ routerlist_remove(rl, ent, 0, time(NULL));
i--;
changed = 1;
continue;
@@ -951,8 +951,8 @@ dirserv_set_router_is_running(routerinfo_t *router, time_t now)
answer = get_options()->AssumeReachable ||
now < router->last_reachable + REACHABLE_TIMEOUT;
- if (router->is_running && !answer) {
- /* it was running but now it's not. tell rephist. */
+ if (!answer) {
+ /* not considered reachable. tell rephist. */
rep_hist_note_router_unreachable(router->cache_info.identity_digest, now);
}
diff --git a/src/or/eventdns.c b/src/or/eventdns.c
index 00e63b5212..9578b24cae 100644
--- a/src/or/eventdns.c
+++ b/src/or/eventdns.c
@@ -1792,7 +1792,7 @@ evdns_server_request_format_response(struct server_request *req, int err)
if (j > 512) {
overflow:
j = 512;
- buf[3] |= 0x02; /* set the truncated bit. */
+ buf[2] |= 0x02; /* set the truncated bit. */
}
req->response_len = (size_t)j;
diff --git a/src/or/main.c b/src/or/main.c
index c3dae2ad54..97957a5791 100644
--- a/src/or/main.c
+++ b/src/or/main.c
@@ -925,7 +925,7 @@ run_scheduled_events(time_t now)
time_to_downrate_stability = rep_hist_downrate_old_runs(now);
if (authdir_mode_tests_reachability(options)) {
if (time_to_save_stability < now) {
- if (time_to_save_stability && rep_hist_record_mtbf_data()<0) {
+ if (time_to_save_stability && rep_hist_record_mtbf_data(now, 1)<0) {
log_warn(LD_GENERAL, "Couldn't store mtbf data.");
}
#define SAVE_STABILITY_INTERVAL (30*60)
@@ -1970,14 +1970,15 @@ tor_cleanup(void)
/* Remove our pid file. We don't care if there was an error when we
* unlink, nothing we could do about it anyways. */
if (options->command == CMD_RUN_TOR) {
+ time_t now = time(NULL);
if (options->PidFile)
unlink(options->PidFile);
if (accounting_is_enabled(options))
- accounting_record_bandwidth_usage(time(NULL), get_or_state());
+ accounting_record_bandwidth_usage(now, get_or_state());
or_state_mark_dirty(get_or_state(), 0); /* force an immediate save. */
- or_state_save(time(NULL));
+ or_state_save(now);
if (authdir_mode_tests_reachability(options))
- rep_hist_record_mtbf_data();
+ rep_hist_record_mtbf_data(now, 0);
}
#ifdef USE_DMALLOC
dmalloc_log_stats();
diff --git a/src/or/or.h b/src/or/or.h
index b6ee72d9b5..091264a4ef 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3969,7 +3969,7 @@ void rep_history_clean(time_t before);
void rep_hist_note_router_reachable(const char *id, time_t when);
void rep_hist_note_router_unreachable(const char *id, time_t when);
-int rep_hist_record_mtbf_data(void);
+int rep_hist_record_mtbf_data(time_t now, int missing_means_down);
int rep_hist_load_mtbf_data(time_t now);
time_t rep_hist_downrate_old_runs(time_t now);
@@ -4405,7 +4405,8 @@ void routerinfo_free(routerinfo_t *router);
void extrainfo_free(extrainfo_t *extrainfo);
void routerlist_free(routerlist_t *rl);
void dump_routerlist_mem_usage(int severity);
-void routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old);
+void routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old,
+ time_t now);
void routerlist_free_all(void);
void routerlist_reset_warnings(void);
void router_set_status(const char *digest, int up);
diff --git a/src/or/rephist.c b/src/or/rephist.c
index 11e040c945..13fdb58b5e 100644
--- a/src/or/rephist.c
+++ b/src/or/rephist.c
@@ -683,9 +683,13 @@ rep_history_clean(time_t before)
}
}
-/** Write MTBF data to disk. Returns 0 on success, negative on failure. */
+/** Write MTBF data to disk. Return 0 on success, negative on failure.
+ *
+ * If <b>missing_means_down</b>, then if we're about to write an entry
+ * that is still considered up but isn't in our routerlist, consider it
+ * to be down. */
int
-rep_hist_record_mtbf_data(void)
+rep_hist_record_mtbf_data(time_t now, int missing_means_down)
{
char time_buf[ISO_TIME_LEN+1];
@@ -745,6 +749,18 @@ rep_hist_record_mtbf_data(void)
hist = (or_history_t*) or_history_p;
base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN);
+
+ if (missing_means_down && hist->start_of_run &&
+ !router_get_by_digest(digest)) {
+ /* We think this relay is running, but it's not listed in our
+ * routerlist. Somehow it fell out without telling us it went
+ * down. Complain and also correct it. */
+ log_info(LD_HIST,
+ "Relay '%s' is listed as up in rephist, but it's not in "
+ "our routerlist. Correcting.", dbuf);
+ rep_hist_note_router_unreachable(digest, now);
+ }
+
PRINTF((f, "R %s\n", dbuf));
if (hist->start_of_run > 0) {
format_iso_time(time_buf, hist->start_of_run);
diff --git a/src/or/routerlist.c b/src/or/routerlist.c
index 1419ae4665..42b385b101 100644
--- a/src/or/routerlist.c
+++ b/src/or/routerlist.c
@@ -1523,15 +1523,12 @@ router_get_advertised_bandwidth_capped(routerinfo_t *router)
return result;
}
-/** Eventually, the number we return will come from the directory
- * consensus, so clients can dynamically update to better numbers.
- *
- * But for now, or in case there is no consensus available, just return
- * a sufficient default. */
-static uint32_t
-get_max_believable_bandwidth(void)
+/** Return bw*1000, unless bw*1000 would overflow, in which case return
+ * INT32_MAX. */
+static INLINE int32_t
+kb_to_bytes(uint32_t bw)
{
- return DEFAULT_MAX_BELIEVABLE_BANDWIDTH;
+ return (bw > (INT32_MAX/1000)) ? INT32_MAX : bw*1000;
}
/** Helper function:
@@ -1568,7 +1565,6 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
int n_unknown = 0;
bitarray_t *exit_bits;
bitarray_t *guard_bits;
- uint32_t max_believable_bw = get_max_believable_bandwidth();
int me_idx = -1;
/* Can't choose exit and guard at same time */
@@ -1598,7 +1594,7 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
is_exit = status->is_exit;
is_guard = status->is_possible_guard;
if (status->has_bandwidth) {
- this_bw = status->bandwidth*1000;
+ this_bw = kb_to_bytes(status->bandwidth);
} else { /* guess */
/* XXX022 once consensuses always list bandwidths, we can take
* this guessing business out. -RD */
@@ -1617,7 +1613,7 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
is_exit = router->is_exit;
is_guard = router->is_possible_guard;
if (rs && rs->has_bandwidth) {
- this_bw = rs->bandwidth*1000;
+ this_bw = kb_to_bytes(rs->bandwidth);
} else if (rs) { /* guess; don't trust the descriptor */
/* XXX022 once consensuses always list bandwidths, we can take
* this guessing business out. -RD */
@@ -1626,27 +1622,15 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
flags |= is_exit ? 2 : 0;
flags |= is_guard ? 4 : 0;
} else /* bridge or other descriptor not in our consensus */
- this_bw = router_get_advertised_bandwidth(router);
+ this_bw = router_get_advertised_bandwidth_capped(router);
}
if (is_exit)
bitarray_set(exit_bits, i);
if (is_guard)
bitarray_set(guard_bits, i);
- /* if they claim something huge, don't believe it */
- if (this_bw > max_believable_bw) {
- char fp[HEX_DIGEST_LEN+1];
- base16_encode(fp, sizeof(fp), statuses ?
- status->identity_digest :
- router->cache_info.identity_digest,
- DIGEST_LEN);
- log_fn(LOG_PROTOCOL_WARN, LD_DIR,
- "Bandwidth %d for router %s (%s) exceeds allowed max %d, capping",
- this_bw, router ? router->nickname : "(null)",
- fp, max_believable_bw);
- this_bw = max_believable_bw;
- }
if (is_known) {
bandwidths[i] = (int32_t) this_bw; // safe since MAX_BELIEVABLE<INT32_MAX
+ tor_assert(bandwidths[i] >= 0);
if (is_guard)
total_guard_bw += this_bw;
else
@@ -2645,7 +2629,7 @@ routerlist_insert_old(routerlist_t *rl, routerinfo_t *ri)
* If <b>make_old</b> is true, instead of deleting the router, we try adding
* it to rl-&gt;old_routers. */
void
-routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old)
+routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old, time_t now)
{
routerinfo_t *ri_tmp;
extrainfo_t *ei_tmp;
@@ -2653,6 +2637,9 @@ routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old)
tor_assert(0 <= idx && idx < smartlist_len(rl->routers));
tor_assert(smartlist_get(rl->routers, idx) == ri);
+ /* make sure the rephist module knows that it's not running */
+ rep_hist_note_router_unreachable(ri->cache_info.identity_digest, now);
+
ri->cache_info.routerlist_index = -1;
smartlist_del(rl->routers, idx);
if (idx < smartlist_len(rl->routers)) {
@@ -3344,7 +3331,7 @@ routerlist_remove_old_routers(void)
log_info(LD_DIR,
"Forgetting obsolete (too old) routerinfo for router '%s'",
router->nickname);
- routerlist_remove(routerlist, router, 1);
+ routerlist_remove(routerlist, router, 1, now);
i--;
}
}