diff options
author | Nick Mathewson <nickm@torproject.org> | 2008-09-23 18:24:20 +0000 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2008-09-23 18:24:20 +0000 |
commit | f28fc83ea57a3964457996238daf2f1697685185 (patch) | |
tree | 8d58dc722156c05322725b830180fb34a12d9eeb | |
parent | 29abfab8bfdc14d4f2f725aee6755dfc2bd6036b (diff) | |
download | tor-f28fc83ea57a3964457996238daf2f1697685185.tar.gz tor-f28fc83ea57a3964457996238daf2f1697685185.zip |
More logging for mtbf/wfu calculations.
svn:r16941
-rw-r--r-- | ChangeLog | 2 | ||||
-rw-r--r-- | src/common/log.c | 2 | ||||
-rw-r--r-- | src/common/log.h | 4 | ||||
-rw-r--r-- | src/or/rephist.c | 85 |
4 files changed, 70 insertions, 23 deletions
@@ -46,6 +46,8 @@ Changes in version 0.2.1.6-alpha - 2008-09-xx default (i.e. new default value for HidServDirectoryV2 is 1). - Serve the latest v3 networkstatus consensus via the control port. Use "getinfo dir/status-vote/current/consensus" to fetch it. + - Better logging about stability/reliability calculations on directory + servers. o Code simplifications and refactoring: - Revise the connection_new functions so that a more typesafe variant diff --git a/src/common/log.c b/src/common/log.c index 785b066522..15f8a1c3df 100644 --- a/src/common/log.c +++ b/src/common/log.c @@ -718,7 +718,7 @@ log_level_to_string(int level) static const char *domain_list[] = { "GENERAL", "CRYPTO", "NET", "CONFIG", "FS", "PROTOCOL", "MM", "HTTP", "APP", "CONTROL", "CIRC", "REND", "BUG", "DIR", "DIRSERV", - "OR", "EDGE", "ACCT", NULL + "OR", "EDGE", "ACCT", "HIST", NULL }; /** DOCDOC */ diff --git a/src/common/log.h b/src/common/log.h index 61bc503b38..4fd62e79e0 100644 --- a/src/common/log.h +++ b/src/common/log.h @@ -90,9 +90,11 @@ #define LD_EXIT LD_EDGE /** Bandwidth accounting. */ #define LD_ACCT (1u<<17) +/** Router history */ +#define LD_HIST (1u<<18) /** Number of logging domains in the code. */ -#define N_LOGGING_DOMAINS 18 +#define N_LOGGING_DOMAINS 19 typedef uint32_t log_domain_mask_t; diff --git a/src/or/rephist.c b/src/or/rephist.c index 093a716da9..18451bbedf 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -284,21 +284,41 @@ rep_hist_note_connection_died(const char* id, time_t when) hist->changed = when; } -/** We have just decided that this router is reachable, meaning - * we will give it a "Running" flag for the next while. */ +/** We have just decided that this router with identity digest <b>id</b> is + * reachable, meaning we will give it a "Running" flag for the next while. */ void rep_hist_note_router_reachable(const char *id, time_t when) { or_history_t *hist = get_or_history(id); + int was_in_run = 1; + char tbuf[ISO_TIME_LEN+1]; + if (!started_tracking_stability) started_tracking_stability = time(NULL); if (hist && !hist->start_of_run) { hist->start_of_run = when; + was_in_run = 0; } if (hist && hist->start_of_downtime) { - long down_length = when - hist->start_of_downtime; + long down_length; + + format_local_iso_time(tbuf, hist->start_of_downtime); + log_info(LD_HIST, "Router %s is now Running; it had been down since %s.", + hex_str(id, DIGEST_LEN), tbuf); + if (was_in_run) + log_info(LD_HIST, " (Paradoxically, it was already Running too.)"); + + down_length = when - hist->start_of_downtime; hist->total_weighted_time += down_length; hist->start_of_downtime = 0; + } else { + format_local_iso_time(tbuf, hist->start_of_run); + if (was_in_run) + log_debug(LD_HIST, "Router %s is still Running; it has been Running " + "since %s", hex_str(id, DIGEST_LEN), tbuf); + else + log_info(LD_HIST, "Router %s is now Running; it was previously untracked", + hex_str(id, DIGEST_LEN)); } } @@ -308,21 +328,41 @@ void rep_hist_note_router_unreachable(const char *id, time_t when) { or_history_t *hist = get_or_history(id); + char tbuf[ISO_TIME_LEN+1]; + int was_running = 0; if (!started_tracking_stability) started_tracking_stability = time(NULL); if (hist && hist->start_of_run) { /*XXXX We could treat failed connections differently from failed * connect attempts. */ long run_length = when - hist->start_of_run; + format_local_iso_time(tbuf, hist->start_of_run); + hist->weighted_run_length += run_length; hist->total_run_weights += 1.0; hist->start_of_run = 0; - hist->weighted_uptime += run_length; hist->total_weighted_time += run_length; + + was_running = 1; + log_info(LD_HIST, "Router %s is now non-Running: it had previously been " + "Running since %s. Its total weighted uptime is %lu/%lu.", + hex_str(id, DIGEST_LEN), tbuf, hist->weighted_uptime, + hist->total_weighted_time); } if (hist && !hist->start_of_downtime) { hist->start_of_downtime = when; + + if (!was_running) + log_info(LD_HIST, "Router %s is now non-Running; it was previously " + "untracked.", hex_str(id, DIGEST_LEN)); + } else { + if (!was_running) { + format_local_iso_time(tbuf, hist->start_of_downtime); + + log_info(LD_HIST, "Router %s is still non-Running; it has been " + "non-Running since %s.", hex_str(id, DIGEST_LEN), tbuf); + } } } @@ -350,6 +390,9 @@ rep_hist_downrate_old_runs(time_t now) alpha *= STABILITY_ALPHA; } + log_info(LD_HIST, "Discounting all old stability info by a factor of %lf", + alpha); + /* Multiply every w_r_l, t_r_w pair by alpha. */ for (orhist_it = digestmap_iter_init(history_map); !digestmap_iter_done(orhist_it); @@ -526,7 +569,7 @@ rep_hist_dump_stats(time_t now, int severity) rep_history_clean(now - get_options()->RephistTrackTime); - log(severity, LD_GENERAL, "--------------- Dumping history information:"); + log(severity, LD_HIST, "--------------- Dumping history information:"); for (orhist_it = digestmap_iter_init(history_map); !digestmap_iter_done(orhist_it); @@ -551,7 +594,7 @@ rep_hist_dump_stats(time_t now, int severity) } else { uptime=1.0; } - log(severity, LD_GENERAL, + log(severity, LD_HIST, "OR %s [%s]: %ld/%ld good connections; uptime %ld/%ld sec (%.2f%%); " "wmtbf %lu:%02lu:%02lu", name1, hexdigest1, @@ -582,7 +625,7 @@ rep_hist_dump_stats(time_t now, int severity) else len += ret; } - log(severity, LD_GENERAL, "%s", buffer); + log(severity, LD_HIST, "%s", buffer); } } } @@ -815,7 +858,7 @@ rep_hist_load_mtbf_data(time_t now) } } if (format != 1 && format != 2) { - log_warn(LD_GENERAL, + log_warn(LD_HIST, "Unrecognized format in mtbf history file. Skipping."); goto err; } @@ -825,17 +868,17 @@ rep_hist_load_mtbf_data(time_t now) break; if (!strcmpstart(line, "last-downrated ")) { if (parse_iso_time(line+strlen("last-downrated "), &last_downrated)<0) - log_warn(LD_GENERAL,"Couldn't parse downrate time in mtbf " + log_warn(LD_HIST,"Couldn't parse downrate time in mtbf " "history file."); } if (!strcmpstart(line, "stored-at ")) { if (parse_iso_time(line+strlen("stored-at "), &stored_at)<0) - log_warn(LD_GENERAL,"Couldn't parse stored time in mtbf " + log_warn(LD_HIST,"Couldn't parse stored time in mtbf " "history file."); } if (!strcmpstart(line, "tracked-since ")) { if (parse_iso_time(line+strlen("tracked-since "), &tracked_since)<0) - log_warn(LD_GENERAL,"Couldn't parse started-tracking time in mtbf " + log_warn(LD_HIST,"Couldn't parse started-tracking time in mtbf " "history file."); } } @@ -845,7 +888,7 @@ rep_hist_load_mtbf_data(time_t now) tracked_since = now; if (!stored_at) { - log_warn(LD_GENERAL, "No stored time recorded."); + log_warn(LD_HIST, "No stored time recorded."); goto err; } @@ -878,7 +921,7 @@ rep_hist_load_mtbf_data(time_t now) n = sscanf(line, "%40s %ld %lf S=%10s %8s", hexbuf, &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11); if (n != 3 && n != 5) { - log_warn(LD_GENERAL, "Couldn't scan line %s", escaped(line)); + log_warn(LD_HIST, "Couldn't scan line %s", escaped(line)); continue; } have_mtbf = 1; @@ -897,7 +940,7 @@ rep_hist_load_mtbf_data(time_t now) if (n == 2 || n == 4) { have_mtbf = 1; } else { - log_warn(LD_GENERAL, "Couldn't scan +MTBF line %s", + log_warn(LD_HIST, "Couldn't scan +MTBF line %s", escaped(mtbfline)); } } @@ -909,7 +952,7 @@ rep_hist_load_mtbf_data(time_t now) if (n == 2 || n == 4) { have_wfu = 1; } else { - log_warn(LD_GENERAL, "Couldn't scan +WFU line %s", escaped(wfuline)); + log_warn(LD_HIST, "Couldn't scan +WFU line %s", escaped(wfuline)); } } if (wfu_idx > i) @@ -918,7 +961,7 @@ rep_hist_load_mtbf_data(time_t now) i = mtbf_idx; } if (base16_decode(digest, DIGEST_LEN, hexbuf, HEX_DIGEST_LEN) < 0) { - log_warn(LD_GENERAL, "Couldn't hex string %s", escaped(hexbuf)); + log_warn(LD_HIST, "Couldn't hex string %s", escaped(hexbuf)); continue; } hist = get_or_history(digest); @@ -929,7 +972,7 @@ rep_hist_load_mtbf_data(time_t now) if (mtbf_timebuf[0]) { mtbf_timebuf[10] = ' '; if (parse_possibly_bad_iso_time(mtbf_timebuf, &start_of_run)<0) - log_warn(LD_GENERAL, "Couldn't parse time %s", + log_warn(LD_HIST, "Couldn't parse time %s", escaped(mtbf_timebuf)); } hist->start_of_run = correct_time(start_of_run, now, stored_at, @@ -944,7 +987,7 @@ rep_hist_load_mtbf_data(time_t now) if (wfu_timebuf[0]) { wfu_timebuf[10] = ' '; if (parse_possibly_bad_iso_time(wfu_timebuf, &start_of_downtime)<0) - log_warn(LD_GENERAL, "Couldn't parse time %s", escaped(wfu_timebuf)); + log_warn(LD_HIST, "Couldn't parse time %s", escaped(wfu_timebuf)); } } hist->start_of_downtime = correct_time(start_of_downtime, now, stored_at, @@ -953,7 +996,7 @@ rep_hist_load_mtbf_data(time_t now) hist->total_weighted_time = total_wt_time; } if (strcmp(line, ".")) - log_warn(LD_GENERAL, "Truncated MTBF file."); + log_warn(LD_HIST, "Truncated MTBF file."); if (tracked_since < 86400*365) /* Recover from insanely early value. */ tracked_since = latest_possible_start; @@ -1335,7 +1378,7 @@ rep_hist_load_state(or_state_t *state, char **err) v = tor_parse_uint64(cp, 10, 0, UINT64_MAX, &ok, NULL); if (!ok) { all_ok=0; - log_notice(LD_GENERAL, "Could not parse '%s' into a number.'", cp); + log_notice(LD_HIST, "Could not parse '%s' into a number.'", cp); } if (start < now) { add_obs(b, start, v); @@ -1617,7 +1660,7 @@ note_crypto_pk_op(pk_op_t operation) void dump_pk_ops(int severity) { - log(severity, LD_GENERAL, + log(severity, LD_HIST, "PK operations: %lu directory objects signed, " "%lu directory objects verified, " "%lu routerdescs signed, " |