diff options
Diffstat (limited to 'src/or/rephist.c')
-rw-r--r-- | src/or/rephist.c | 870 |
1 files changed, 652 insertions, 218 deletions
diff --git a/src/or/rephist.c b/src/or/rephist.c index f05186e319..ec5b84692e 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -1,5 +1,5 @@ /* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2011, The Tor Project, Inc. */ + * Copyright (c) 2007-2012, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -7,7 +7,7 @@ * \brief Basic history and "reputation" functionality to remember * which servers have worked in the past, how much bandwidth we've * been using, which ports we tend to want, and so on; further, - * exit port statistics and cell statistics. + * exit port statistics, cell statistics, and connection statistics. **/ #include "or.h" @@ -15,6 +15,7 @@ #include "circuituse.h" #include "config.h" #include "networkstatus.h" +#include "nodelist.h" #include "rephist.h" #include "router.h" #include "routerlist.h" @@ -643,7 +644,7 @@ rep_hist_dump_stats(time_t now, int severity) size_t len; int ret; unsigned long upt, downt; - routerinfo_t *r; + const node_t *node; rep_history_clean(now - get_options()->RephistTrackTime); @@ -657,8 +658,8 @@ rep_hist_dump_stats(time_t now, int severity) digestmap_iter_get(orhist_it, &digest1, &or_history_p); or_history = (or_history_t*) or_history_p; - if ((r = router_get_by_digest(digest1))) - name1 = r->nickname; + if ((node = node_get_by_id(digest1)) && node_get_nickname(node)) + name1 = node_get_nickname(node); else name1 = "(unknown)"; base16_encode(hexdigest1, sizeof(hexdigest1), digest1, DIGEST_LEN); @@ -688,8 +689,8 @@ rep_hist_dump_stats(time_t now, int severity) lhist_it = digestmap_iter_next(or_history->link_history_map, lhist_it)) { digestmap_iter_get(lhist_it, &digest2, &link_history_p); - if ((r = router_get_by_digest(digest2))) - name2 = r->nickname; + if ((node = node_get_by_id(digest2)) && node_get_nickname(node)) + name2 = node_get_nickname(node); else name2 = "(unknown)"; @@ -823,7 +824,7 @@ rep_hist_record_mtbf_data(time_t now, int missing_means_down) base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN); if (missing_means_down && hist->start_of_run && - !router_get_by_digest(digest)) { + !router_get_by_id_digest(digest)) { /* We think this relay is running, but it's not listed in our * routerlist. Somehow it fell out without telling us it went * down. Complain and also correct it. */ @@ -929,7 +930,7 @@ rep_hist_get_router_stability_doc(time_t now) return NULL; tor_free(last_stability_doc); - chunks = smartlist_create(); + chunks = smartlist_new(); if (rep_hist_have_measured_enough_stability()) { smartlist_add(chunks, tor_strdup("we-have-enough-measurements\n")); @@ -938,33 +939,35 @@ rep_hist_get_router_stability_doc(time_t now) } DIGESTMAP_FOREACH(history_map, id, or_history_t *, hist) { - routerinfo_t *ri; + const node_t *node; char dbuf[BASE64_DIGEST_LEN+1]; - char header_buf[512]; char *info; digest_to_base64(dbuf, id); - ri = router_get_by_digest(id); - if (ri) { - char *ip = tor_dup_ip(ri->addr); + node = node_get_by_id(id); + if (node) { + char ip[INET_NTOA_BUF_LEN+1]; char tbuf[ISO_TIME_LEN+1]; - format_iso_time(tbuf, ri->cache_info.published_on); - tor_snprintf(header_buf, sizeof(header_buf), + time_t published = node_get_published_on(node); + node_get_address_string(node,ip,sizeof(ip)); + if (published > 0) + format_iso_time(tbuf, published); + else + strlcpy(tbuf, "???", sizeof(tbuf)); + smartlist_add_asprintf(chunks, "router %s %s %s\n" "published %s\n" "relevant-flags %s%s%s\n" "declared-uptime %ld\n", - dbuf, ri->nickname, ip, + dbuf, node_get_nickname(node), ip, tbuf, - ri->is_running ? "Running " : "", - ri->is_valid ? "Valid " : "", - ri->is_hibernating ? "Hibernating " : "", - ri->uptime); - tor_free(ip); + node->is_running ? "Running " : "", + node->is_valid ? "Valid " : "", + node->ri && node->ri->is_hibernating ? "Hibernating " : "", + node_get_declared_uptime(node)); } else { - tor_snprintf(header_buf, sizeof(header_buf), + smartlist_add_asprintf(chunks, "router %s {no descriptor}\n", dbuf); } - smartlist_add(chunks, tor_strdup(header_buf)); info = rep_hist_format_router_status(hist, now); if (info) smartlist_add(chunks, info); @@ -1058,7 +1061,7 @@ rep_hist_load_mtbf_data(time_t now) tor_free(filename); if (!d) return -1; - lines = smartlist_create(); + lines = smartlist_new(); smartlist_split_string(lines, d, "\n", SPLIT_SKIP_SPACE, 0); tor_free(d); } @@ -1474,7 +1477,7 @@ rep_hist_fill_bandwidth_history(char *buf, size_t len, const bw_array_t *b) { char *cp = buf; int i, n; - or_options_t *options = get_options(); + const or_options_t *options = get_options(); uint64_t cutoff; if (b->num_maxes_set <= b->next_max_idx) { @@ -1583,7 +1586,6 @@ rep_hist_update_bwhist_state_section(or_state_t *state, time_t *s_begins, int *s_interval) { - char *cp; int i,j; uint64_t maxval; @@ -1607,31 +1609,31 @@ rep_hist_update_bwhist_state_section(or_state_t *state, } *s_begins = 0; *s_interval = 900; - *s_values = smartlist_create(); - *s_maxima = smartlist_create(); + *s_values = smartlist_new(); + *s_maxima = smartlist_new(); return; } *s_begins = b->next_period; *s_interval = NUM_SECS_BW_SUM_INTERVAL; - *s_values = smartlist_create(); - *s_maxima = smartlist_create(); + *s_values = smartlist_new(); + *s_maxima = smartlist_new(); /* Set i to first position in circular array */ i = (b->num_maxes_set <= b->next_max_idx) ? 0 : b->next_max_idx; for (j=0; j < b->num_maxes_set; ++j,++i) { if (i >= NUM_TOTALS) i = 0; - tor_asprintf(&cp, U64_FORMAT, U64_PRINTF_ARG(b->totals[i] & ~0x3ff)); - smartlist_add(*s_values, cp); + smartlist_add_asprintf(*s_values, U64_FORMAT, + U64_PRINTF_ARG(b->totals[i] & ~0x3ff)); maxval = b->maxima[i] / NUM_SECS_ROLLING_MEASURE; - tor_asprintf(&cp, U64_FORMAT, U64_PRINTF_ARG(maxval & ~0x3ff)); - smartlist_add(*s_maxima, cp); + smartlist_add_asprintf(*s_maxima, U64_FORMAT, + U64_PRINTF_ARG(maxval & ~0x3ff)); } - tor_asprintf(&cp, U64_FORMAT, U64_PRINTF_ARG(b->total_in_period & ~0x3ff)); - smartlist_add(*s_values, cp); + smartlist_add_asprintf(*s_values, U64_FORMAT, + U64_PRINTF_ARG(b->total_in_period & ~0x3ff)); maxval = b->max_total / NUM_SECS_ROLLING_MEASURE; - tor_asprintf(&cp, U64_FORMAT, U64_PRINTF_ARG(maxval & ~0x3ff)); - smartlist_add(*s_maxima, cp); + smartlist_add_asprintf(*s_maxima, U64_FORMAT, + U64_PRINTF_ARG(maxval & ~0x3ff)); } /** Update <b>state</b> with the newest bandwidth history. Done before @@ -1770,10 +1772,18 @@ rep_hist_load_state(or_state_t *state, char **err) /*********************************************************************/ +/** A single predicted port: used to remember which ports we've made + * connections to, so that we can try to keep making circuits that can handle + * those ports. */ +typedef struct predicted_port_t { + /** The port we connected to */ + uint16_t port; + /** The time at which we last used it */ + time_t time; +} predicted_port_t; + /** A list of port numbers that have been used recently. */ static smartlist_t *predicted_ports_list=NULL; -/** The corresponding most recently used time for each port. */ -static smartlist_t *predicted_ports_times=NULL; /** We just got an application request for a connection with * port <b>port</b>. Remember it for the future, so we can keep @@ -1782,14 +1792,11 @@ static smartlist_t *predicted_ports_times=NULL; static void add_predicted_port(time_t now, uint16_t port) { - /* XXXX we could just use uintptr_t here, I think. -NM */ - uint16_t *tmp_port = tor_malloc(sizeof(uint16_t)); - time_t *tmp_time = tor_malloc(sizeof(time_t)); - *tmp_port = port; - *tmp_time = now; - rephist_total_alloc += sizeof(uint16_t) + sizeof(time_t); - smartlist_add(predicted_ports_list, tmp_port); - smartlist_add(predicted_ports_times, tmp_time); + predicted_port_t *pp = tor_malloc(sizeof(predicted_port_t)); + pp->port = port; + pp->time = now; + rephist_total_alloc += sizeof(*pp); + smartlist_add(predicted_ports_list, pp); } /** Initialize whatever memory and structs are needed for predicting @@ -1799,8 +1806,7 @@ add_predicted_port(time_t now, uint16_t port) static void predicted_ports_init(void) { - predicted_ports_list = smartlist_create(); - predicted_ports_times = smartlist_create(); + predicted_ports_list = smartlist_new(); add_predicted_port(time(NULL), 80); /* add one to kickstart us */ } @@ -1810,12 +1816,11 @@ predicted_ports_init(void) static void predicted_ports_free(void) { - rephist_total_alloc -= smartlist_len(predicted_ports_list)*sizeof(uint16_t); - SMARTLIST_FOREACH(predicted_ports_list, char *, cp, tor_free(cp)); + rephist_total_alloc -= + smartlist_len(predicted_ports_list)*sizeof(predicted_port_t); + SMARTLIST_FOREACH(predicted_ports_list, predicted_port_t *, + pp, tor_free(pp)); smartlist_free(predicted_ports_list); - rephist_total_alloc -= smartlist_len(predicted_ports_times)*sizeof(time_t); - SMARTLIST_FOREACH(predicted_ports_times, char *, cp, tor_free(cp)); - smartlist_free(predicted_ports_times); } /** Remember that <b>port</b> has been asked for as of time <b>now</b>. @@ -1825,24 +1830,17 @@ predicted_ports_free(void) void rep_hist_note_used_port(time_t now, uint16_t port) { - int i; - uint16_t *tmp_port; - time_t *tmp_time; - tor_assert(predicted_ports_list); - tor_assert(predicted_ports_times); if (!port) /* record nothing */ return; - for (i = 0; i < smartlist_len(predicted_ports_list); ++i) { - tmp_port = smartlist_get(predicted_ports_list, i); - tmp_time = smartlist_get(predicted_ports_times, i); - if (*tmp_port == port) { - *tmp_time = now; + SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) { + if (pp->port == port) { + pp->time = now; return; } - } + } SMARTLIST_FOREACH_END(pp); /* it's not there yet; we need to add it */ add_predicted_port(now, port); } @@ -1851,36 +1849,48 @@ rep_hist_note_used_port(time_t now, uint16_t port) * we'll want to make connections to the same port in the future. */ #define PREDICTED_CIRCS_RELEVANCE_TIME (60*60) -/** Return a pointer to the list of port numbers that +/** Return a newly allocated pointer to a list of uint16_t * for ports that * are likely to be asked for in the near future. - * - * The caller promises not to mess with it. */ smartlist_t * rep_hist_get_predicted_ports(time_t now) { - int i; - uint16_t *tmp_port; - time_t *tmp_time; - + smartlist_t *out = smartlist_new(); tor_assert(predicted_ports_list); - tor_assert(predicted_ports_times); /* clean out obsolete entries */ - for (i = 0; i < smartlist_len(predicted_ports_list); ++i) { - tmp_time = smartlist_get(predicted_ports_times, i); - if (*tmp_time + PREDICTED_CIRCS_RELEVANCE_TIME < now) { - tmp_port = smartlist_get(predicted_ports_list, i); - log_debug(LD_CIRC, "Expiring predicted port %d", *tmp_port); - smartlist_del(predicted_ports_list, i); - smartlist_del(predicted_ports_times, i); - rephist_total_alloc -= sizeof(uint16_t)+sizeof(time_t); - tor_free(tmp_port); - tor_free(tmp_time); - i--; + SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) { + if (pp->time + PREDICTED_CIRCS_RELEVANCE_TIME < now) { + log_debug(LD_CIRC, "Expiring predicted port %d", pp->port); + + rephist_total_alloc -= sizeof(predicted_port_t); + tor_free(pp); + SMARTLIST_DEL_CURRENT(predicted_ports_list, pp); + } else { + smartlist_add(out, tor_memdup(&pp->port, sizeof(uint16_t))); } - } - return predicted_ports_list; + } SMARTLIST_FOREACH_END(pp); + return out; +} + +/** + * Take a list of uint16_t *, and remove every port in the list from the + * current list of predicted ports. + */ +void +rep_hist_remove_predicted_ports(const smartlist_t *rmv_ports) +{ + /* Let's do this on O(N), not O(N^2). */ + bitarray_t *remove_ports = bitarray_init_zero(UINT16_MAX); + SMARTLIST_FOREACH(rmv_ports, const uint16_t *, p, + bitarray_set(remove_ports, *p)); + SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) { + if (bitarray_is_set(remove_ports, pp->port)) { + tor_free(pp); + SMARTLIST_DEL_CURRENT(predicted_ports_list, pp); + } + } SMARTLIST_FOREACH_END(pp); + bitarray_free(remove_ports); } /** The user asked us to do a resolve. Rather than keeping track of @@ -2116,7 +2126,9 @@ rep_hist_exit_stats_term(void) tor_free(exit_streams); } -/** Helper for qsort: compare two ints. */ +/** Helper for qsort: compare two ints. Does not handle overflow properly, + * but works fine for sorting an array of port numbers, which is what we use + * it for. */ static int _compare_int(const void *x, const void *y) { @@ -2124,7 +2136,8 @@ _compare_int(const void *x, const void *y) } /** Return a newly allocated string containing the exit port statistics - * until <b>now</b>, or NULL if we're not collecting exit stats. */ + * until <b>now</b>, or NULL if we're not collecting exit stats. Caller + * must ensure start_of_exit_stats_interval is in the past. */ char * rep_hist_format_exit_stats(time_t now) { @@ -2134,7 +2147,6 @@ rep_hist_format_exit_stats(time_t now) uint64_t cur_bytes = 0, other_read = 0, other_written = 0, total_read = 0, total_written = 0; uint32_t total_streams = 0, other_streams = 0; - char *buf; smartlist_t *written_strings, *read_strings, *streams_strings; char *written_string, *read_string, *streams_string; char t[ISO_TIME_LEN+1]; @@ -2143,6 +2155,8 @@ rep_hist_format_exit_stats(time_t now) if (!start_of_exit_stats_interval) return NULL; /* Not initialized. */ + tor_assert(now >= start_of_exit_stats_interval); + /* Go through all ports to find the n ports that saw most written and * read bytes. * @@ -2195,9 +2209,9 @@ rep_hist_format_exit_stats(time_t now) } /* Add observations of top ports to smartlists. */ - written_strings = smartlist_create(); - read_strings = smartlist_create(); - streams_strings = smartlist_create(); + written_strings = smartlist_new(); + read_strings = smartlist_new(); + streams_strings = smartlist_new(); other_read = total_read; other_written = total_written; other_streams = total_streams; @@ -2211,9 +2225,8 @@ rep_hist_format_exit_stats(time_t now) exit_bytes_written[cur_port], EXIT_STATS_ROUND_UP_BYTES); num /= 1024; - buf = NULL; - tor_asprintf(&buf, "%d="U64_FORMAT, cur_port, U64_PRINTF_ARG(num)); - smartlist_add(written_strings, buf); + smartlist_add_asprintf(written_strings, "%d="U64_FORMAT, + cur_port, U64_PRINTF_ARG(num)); other_written -= exit_bytes_written[cur_port]; } if (exit_bytes_read[cur_port] > 0) { @@ -2221,18 +2234,15 @@ rep_hist_format_exit_stats(time_t now) exit_bytes_read[cur_port], EXIT_STATS_ROUND_UP_BYTES); num /= 1024; - buf = NULL; - tor_asprintf(&buf, "%d="U64_FORMAT, cur_port, U64_PRINTF_ARG(num)); - smartlist_add(read_strings, buf); + smartlist_add_asprintf(read_strings, "%d="U64_FORMAT, + cur_port, U64_PRINTF_ARG(num)); other_read -= exit_bytes_read[cur_port]; } if (exit_streams[cur_port] > 0) { uint32_t num = round_uint32_to_next_multiple_of( exit_streams[cur_port], EXIT_STATS_ROUND_UP_STREAMS); - buf = NULL; - tor_asprintf(&buf, "%d=%u", cur_port, num); - smartlist_add(streams_strings, buf); + smartlist_add_asprintf(streams_strings, "%d=%u", cur_port, num); other_streams -= exit_streams[cur_port]; } } @@ -2241,20 +2251,16 @@ rep_hist_format_exit_stats(time_t now) other_written = round_uint64_to_next_multiple_of(other_written, EXIT_STATS_ROUND_UP_BYTES); other_written /= 1024; - buf = NULL; - tor_asprintf(&buf, "other="U64_FORMAT, U64_PRINTF_ARG(other_written)); - smartlist_add(written_strings, buf); + smartlist_add_asprintf(written_strings, "other="U64_FORMAT, + U64_PRINTF_ARG(other_written)); other_read = round_uint64_to_next_multiple_of(other_read, EXIT_STATS_ROUND_UP_BYTES); other_read /= 1024; - buf = NULL; - tor_asprintf(&buf, "other="U64_FORMAT, U64_PRINTF_ARG(other_read)); - smartlist_add(read_strings, buf); + smartlist_add_asprintf(read_strings, "other="U64_FORMAT, + U64_PRINTF_ARG(other_read)); other_streams = round_uint32_to_next_multiple_of(other_streams, EXIT_STATS_ROUND_UP_STREAMS); - buf = NULL; - tor_asprintf(&buf, "other=%u", other_streams); - smartlist_add(streams_strings, buf); + smartlist_add_asprintf(streams_strings, "other=%u", other_streams); /* Join all observations in single strings. */ written_string = smartlist_join_strings(written_strings, ",", 0, NULL); @@ -2374,41 +2380,60 @@ typedef struct circ_buffer_stats_t { /** List of circ_buffer_stats_t. */ static smartlist_t *circuits_for_buffer_stats = NULL; +/** Remember cell statistics <b>mean_num_cells_in_queue</b>, + * <b>mean_time_cells_in_queue</b>, and <b>processed_cells</b> of a + * circuit. */ +void +rep_hist_add_buffer_stats(double mean_num_cells_in_queue, + double mean_time_cells_in_queue, uint32_t processed_cells) +{ + circ_buffer_stats_t *stat; + if (!start_of_buffer_stats_interval) + return; /* Not initialized. */ + stat = tor_malloc_zero(sizeof(circ_buffer_stats_t)); + stat->mean_num_cells_in_queue = mean_num_cells_in_queue; + stat->mean_time_cells_in_queue = mean_time_cells_in_queue; + stat->processed_cells = processed_cells; + if (!circuits_for_buffer_stats) + circuits_for_buffer_stats = smartlist_new(); + smartlist_add(circuits_for_buffer_stats, stat); +} + /** Remember cell statistics for circuit <b>circ</b> at time * <b>end_of_interval</b> and reset cell counters in case the circuit * remains open in the next measurement interval. */ void rep_hist_buffer_stats_add_circ(circuit_t *circ, time_t end_of_interval) { - circ_buffer_stats_t *stat; time_t start_of_interval; int interval_length; or_circuit_t *orcirc; + double mean_num_cells_in_queue, mean_time_cells_in_queue; + uint32_t processed_cells; if (CIRCUIT_IS_ORIGIN(circ)) return; orcirc = TO_OR_CIRCUIT(circ); if (!orcirc->processed_cells) return; - if (!circuits_for_buffer_stats) - circuits_for_buffer_stats = smartlist_create(); - start_of_interval = circ->timestamp_created.tv_sec > - start_of_buffer_stats_interval ? + start_of_interval = (circ->timestamp_created.tv_sec > + start_of_buffer_stats_interval) ? circ->timestamp_created.tv_sec : start_of_buffer_stats_interval; interval_length = (int) (end_of_interval - start_of_interval); if (interval_length <= 0) return; - stat = tor_malloc_zero(sizeof(circ_buffer_stats_t)); - stat->processed_cells = orcirc->processed_cells; + processed_cells = orcirc->processed_cells; /* 1000.0 for s -> ms; 2.0 because of app-ward and exit-ward queues */ - stat->mean_num_cells_in_queue = (double) orcirc->total_cell_waiting_time / + mean_num_cells_in_queue = (double) orcirc->total_cell_waiting_time / (double) interval_length / 1000.0 / 2.0; - stat->mean_time_cells_in_queue = + mean_time_cells_in_queue = (double) orcirc->total_cell_waiting_time / (double) orcirc->processed_cells; - smartlist_add(circuits_for_buffer_stats, stat); orcirc->total_cell_waiting_time = 0; orcirc->processed_cells = 0; + rep_hist_add_buffer_stats(mean_num_cells_in_queue, + mean_time_cells_in_queue, + processed_cells); } /** Sorting helper: return -1, 1, or 0 based on comparison of two @@ -2430,136 +2455,541 @@ _buffer_stats_compare_entries(const void **_a, const void **_b) void rep_hist_buffer_stats_term(void) { - start_of_buffer_stats_interval = 0; + rep_hist_reset_buffer_stats(0); +} + +/** Clear history of circuit statistics and set the measurement interval + * start to <b>now</b>. */ +void +rep_hist_reset_buffer_stats(time_t now) +{ if (!circuits_for_buffer_stats) - circuits_for_buffer_stats = smartlist_create(); + circuits_for_buffer_stats = smartlist_new(); SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *, stat, tor_free(stat)); smartlist_clear(circuits_for_buffer_stats); + start_of_buffer_stats_interval = now; } -/** Write buffer statistics to $DATADIR/stats/buffer-stats and return when - * we would next want to write exit stats. */ -time_t -rep_hist_buffer_stats_write(time_t now) +/** Return a newly allocated string containing the buffer statistics until + * <b>now</b>, or NULL if we're not collecting buffer stats. Caller must + * ensure start_of_buffer_stats_interval is in the past. */ +char * +rep_hist_format_buffer_stats(time_t now) { - char *statsdir = NULL, *filename = NULL; - char written[ISO_TIME_LEN+1]; - open_file_t *open_file = NULL; - FILE *out; #define SHARES 10 int processed_cells[SHARES], circs_in_share[SHARES], number_of_circuits, i; double queued_cells[SHARES], time_in_queue[SHARES]; - smartlist_t *str_build = NULL; - char *str = NULL, *buf = NULL; - circuit_t *circ; + smartlist_t *processed_cells_strings, *queued_cells_strings, + *time_in_queue_strings; + char *processed_cells_string, *queued_cells_string, + *time_in_queue_string; + char t[ISO_TIME_LEN+1]; + char *result; if (!start_of_buffer_stats_interval) - return 0; /* Not initialized. */ - if (start_of_buffer_stats_interval + WRITE_STATS_INTERVAL > now) - goto done; /* Not ready to write */ + return NULL; /* Not initialized. */ - str_build = smartlist_create(); + tor_assert(now >= start_of_buffer_stats_interval); - /* add current circuits to stats */ - for (circ = _circuit_get_global_list(); circ; circ = circ->next) - rep_hist_buffer_stats_add_circ(circ, now); - /* calculate deciles */ + /* Calculate deciles if we saw at least one circuit. */ memset(processed_cells, 0, SHARES * sizeof(int)); memset(circs_in_share, 0, SHARES * sizeof(int)); memset(queued_cells, 0, SHARES * sizeof(double)); memset(time_in_queue, 0, SHARES * sizeof(double)); if (!circuits_for_buffer_stats) - circuits_for_buffer_stats = smartlist_create(); - smartlist_sort(circuits_for_buffer_stats, - _buffer_stats_compare_entries); + circuits_for_buffer_stats = smartlist_new(); number_of_circuits = smartlist_len(circuits_for_buffer_stats); - if (number_of_circuits < 1) { - log_info(LD_HIST, "Attempt to write cell statistics to disk failed. " - "We haven't seen a single circuit to report about."); - goto done; + if (number_of_circuits > 0) { + smartlist_sort(circuits_for_buffer_stats, + _buffer_stats_compare_entries); + i = 0; + SMARTLIST_FOREACH_BEGIN(circuits_for_buffer_stats, + circ_buffer_stats_t *, stat) + { + int share = i++ * SHARES / number_of_circuits; + processed_cells[share] += stat->processed_cells; + queued_cells[share] += stat->mean_num_cells_in_queue; + time_in_queue[share] += stat->mean_time_cells_in_queue; + circs_in_share[share]++; + } + SMARTLIST_FOREACH_END(stat); } - i = 0; - SMARTLIST_FOREACH_BEGIN(circuits_for_buffer_stats, - circ_buffer_stats_t *, stat) - { - int share = i++ * SHARES / number_of_circuits; - processed_cells[share] += stat->processed_cells; - queued_cells[share] += stat->mean_num_cells_in_queue; - time_in_queue[share] += stat->mean_time_cells_in_queue; - circs_in_share[share]++; - } - SMARTLIST_FOREACH_END(stat); - /* clear buffer stats history */ - SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *, - stat, tor_free(stat)); - smartlist_clear(circuits_for_buffer_stats); - /* write to file */ - statsdir = get_datadir_fname("stats"); - if (check_private_dir(statsdir, CPD_CREATE, get_options()->User) < 0) - goto done; - filename = get_datadir_fname2("stats", "buffer-stats"); - out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND | O_TEXT, - 0600, &open_file); - if (!out) - goto done; - format_iso_time(written, now); - if (fprintf(out, "cell-stats-end %s (%d s)\n", written, - (unsigned) (now - start_of_buffer_stats_interval)) < 0) - goto done; + + /* Write deciles to strings. */ + processed_cells_strings = smartlist_new(); + queued_cells_strings = smartlist_new(); + time_in_queue_strings = smartlist_new(); for (i = 0; i < SHARES; i++) { - tor_asprintf(&buf,"%d", !circs_in_share[i] ? 0 : - processed_cells[i] / circs_in_share[i]); - smartlist_add(str_build, buf); + smartlist_add_asprintf(processed_cells_strings, + "%d", !circs_in_share[i] ? 0 : + processed_cells[i] / circs_in_share[i]); } - str = smartlist_join_strings(str_build, ",", 0, NULL); - if (fprintf(out, "cell-processed-cells %s\n", str) < 0) - goto done; - tor_free(str); - SMARTLIST_FOREACH(str_build, char *, c, tor_free(c)); - smartlist_clear(str_build); for (i = 0; i < SHARES; i++) { - tor_asprintf(&buf, "%.2f", circs_in_share[i] == 0 ? 0.0 : - queued_cells[i] / (double) circs_in_share[i]); - smartlist_add(str_build, buf); + smartlist_add_asprintf(queued_cells_strings, "%.2f", + circs_in_share[i] == 0 ? 0.0 : + queued_cells[i] / (double) circs_in_share[i]); } - str = smartlist_join_strings(str_build, ",", 0, NULL); - if (fprintf(out, "cell-queued-cells %s\n", str) < 0) - goto done; - tor_free(str); - SMARTLIST_FOREACH(str_build, char *, c, tor_free(c)); - smartlist_clear(str_build); for (i = 0; i < SHARES; i++) { - tor_asprintf(&buf, "%.0f", circs_in_share[i] == 0 ? 0.0 : - time_in_queue[i] / (double) circs_in_share[i]); - smartlist_add(str_build, buf); + smartlist_add_asprintf(time_in_queue_strings, "%.0f", + circs_in_share[i] == 0 ? 0.0 : + time_in_queue[i] / (double) circs_in_share[i]); } - str = smartlist_join_strings(str_build, ",", 0, NULL); - if (fprintf(out, "cell-time-in-queue %s\n", str) < 0) - goto done; - tor_free(str); - SMARTLIST_FOREACH(str_build, char *, c, tor_free(c)); - smartlist_free(str_build); - str_build = NULL; - if (fprintf(out, "cell-circuits-per-decile %d\n", - (number_of_circuits + SHARES - 1) / SHARES) < 0) + + /* Join all observations in single strings. */ + processed_cells_string = smartlist_join_strings(processed_cells_strings, + ",", 0, NULL); + queued_cells_string = smartlist_join_strings(queued_cells_strings, + ",", 0, NULL); + time_in_queue_string = smartlist_join_strings(time_in_queue_strings, + ",", 0, NULL); + SMARTLIST_FOREACH(processed_cells_strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(queued_cells_strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(time_in_queue_strings, char *, cp, tor_free(cp)); + smartlist_free(processed_cells_strings); + smartlist_free(queued_cells_strings); + smartlist_free(time_in_queue_strings); + + /* Put everything together. */ + format_iso_time(t, now); + tor_asprintf(&result, "cell-stats-end %s (%d s)\n" + "cell-processed-cells %s\n" + "cell-queued-cells %s\n" + "cell-time-in-queue %s\n" + "cell-circuits-per-decile %d\n", + t, (unsigned) (now - start_of_buffer_stats_interval), + processed_cells_string, + queued_cells_string, + time_in_queue_string, + (number_of_circuits + SHARES - 1) / SHARES); + tor_free(processed_cells_string); + tor_free(queued_cells_string); + tor_free(time_in_queue_string); + return result; +#undef SHARES +} + +/** If 24 hours have passed since the beginning of the current buffer + * stats period, write buffer stats to $DATADIR/stats/buffer-stats + * (possibly overwriting an existing file) and reset counters. Return + * when we would next want to write buffer stats or 0 if we never want to + * write. */ +time_t +rep_hist_buffer_stats_write(time_t now) +{ + circuit_t *circ; + char *statsdir = NULL, *filename = NULL, *str = NULL; + + if (!start_of_buffer_stats_interval) + return 0; /* Not initialized. */ + if (start_of_buffer_stats_interval + WRITE_STATS_INTERVAL > now) + goto done; /* Not ready to write */ + + /* Add open circuits to the history. */ + for (circ = _circuit_get_global_list(); circ; circ = circ->next) { + rep_hist_buffer_stats_add_circ(circ, now); + } + + /* Generate history string. */ + str = rep_hist_format_buffer_stats(now); + + /* Reset both buffer history and counters of open circuits. */ + rep_hist_reset_buffer_stats(now); + + /* Try to write to disk. */ + statsdir = get_datadir_fname("stats"); + if (check_private_dir(statsdir, CPD_CREATE, get_options()->User) < 0) { + log_warn(LD_HIST, "Unable to create stats/ directory!"); goto done; - finish_writing_to_file(open_file); - open_file = NULL; - start_of_buffer_stats_interval = now; + } + filename = get_datadir_fname2("stats", "buffer-stats"); + if (write_str_to_file(filename, str, 0) < 0) + log_warn(LD_HIST, "Unable to write buffer stats to disk!"); + done: - if (open_file) - abort_writing_to_file(open_file); + tor_free(str); tor_free(filename); tor_free(statsdir); - if (str_build) { - SMARTLIST_FOREACH(str_build, char *, c, tor_free(c)); - smartlist_free(str_build); + return start_of_buffer_stats_interval + WRITE_STATS_INTERVAL; +} + +/*** Descriptor serving statistics ***/ + +/** Digestmap to track which descriptors were downloaded this stats + * collection interval. It maps descriptor digest to pointers to 1, + * effectively turning this into a list. */ +static digestmap_t *served_descs = NULL; + +/** Number of how many descriptors were downloaded in total during this + * interval. */ +static unsigned long total_descriptor_downloads; + +/** Start time of served descs stats or 0 if we're not collecting those. */ +static time_t start_of_served_descs_stats_interval; + +/** Initialize descriptor stats. */ +void +rep_hist_desc_stats_init(time_t now) +{ + if (served_descs) { + log_warn(LD_BUG, "Called rep_hist_desc_stats_init() when desc stats were " + "already initialized. This is probably harmless."); + return; // Already initialized + } + served_descs = digestmap_new(); + total_descriptor_downloads = 0; + start_of_served_descs_stats_interval = now; +} + +/** Reset served descs stats to empty, starting a new interval <b>now</b>. */ +static void +rep_hist_reset_desc_stats(time_t now) +{ + rep_hist_desc_stats_term(); + rep_hist_desc_stats_init(now); +} + +/** Stop collecting served descs stats, so that rep_hist_desc_stats_init() is + * safe to be called again. */ +void +rep_hist_desc_stats_term(void) +{ + digestmap_free(served_descs, NULL); + served_descs = NULL; + start_of_served_descs_stats_interval = 0; + total_descriptor_downloads = 0; +} + +/** Helper for rep_hist_desc_stats_write(). Return a newly allocated string + * containing the served desc statistics until now, or NULL if we're not + * collecting served desc stats. Caller must ensure that now is not before + * start_of_served_descs_stats_interval. */ +static char * +rep_hist_format_desc_stats(time_t now) +{ + char t[ISO_TIME_LEN+1]; + char *result; + + digestmap_iter_t *iter; + const char *key; + void *val; + unsigned size; + int *vals, max = 0, q3 = 0, md = 0, q1 = 0, min = 0; + int n = 0; + + if (!start_of_served_descs_stats_interval) + return NULL; + + size = digestmap_size(served_descs); + if (size > 0) { + vals = tor_malloc(size * sizeof(int)); + for (iter = digestmap_iter_init(served_descs); + !digestmap_iter_done(iter); + iter = digestmap_iter_next(served_descs, iter)) { + uintptr_t count; + digestmap_iter_get(iter, &key, &val); + count = (uintptr_t)val; + vals[n++] = (int)count; + (void)key; + } + max = find_nth_int(vals, size, size-1); + q3 = find_nth_int(vals, size, (3*size-1)/4); + md = find_nth_int(vals, size, (size-1)/2); + q1 = find_nth_int(vals, size, (size-1)/4); + min = find_nth_int(vals, size, 0); + tor_free(vals); + } + + format_iso_time(t, now); + + tor_asprintf(&result, + "served-descs-stats-end %s (%d s) total=%lu unique=%u " + "max=%d q3=%d md=%d q1=%d min=%d\n", + t, + (unsigned) (now - start_of_served_descs_stats_interval), + total_descriptor_downloads, + size, max, q3, md, q1, min); + + return result; +} + +/** If WRITE_STATS_INTERVAL seconds have passed since the beginning of + * the current served desc stats interval, write the stats to + * $DATADIR/stats/served-desc-stats (possibly appending to an existing file) + * and reset the state for the next interval. Return when we would next want + * to write served desc stats or 0 if we won't want to write. */ +time_t +rep_hist_desc_stats_write(time_t now) +{ + char *statsdir = NULL, *filename = NULL, *str = NULL; + + if (!start_of_served_descs_stats_interval) + return 0; /* We're not collecting stats. */ + if (start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL > now) + return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL; + + str = rep_hist_format_desc_stats(now); + tor_assert(str != NULL); + + statsdir = get_datadir_fname("stats"); + if (check_private_dir(statsdir, CPD_CREATE, get_options()->User) < 0) { + log_warn(LD_HIST, "Unable to create stats/ directory!"); + goto done; + } + filename = get_datadir_fname2("stats", "served-desc-stats"); + if (append_bytes_to_file(filename, str, strlen(str), 0) < 0) + log_warn(LD_HIST, "Unable to write served descs statistics to disk!"); + + rep_hist_reset_desc_stats(now); + + done: + tor_free(statsdir); + tor_free(filename); + tor_free(str); + return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL; +} + +/* DOCDOC rep_hist_note_desc_served */ +void +rep_hist_note_desc_served(const char * desc) +{ + void *val; + uintptr_t count; + if (!served_descs) + return; // We're not collecting stats + val = digestmap_get(served_descs, desc); + count = (uintptr_t)val; + if (count != INT_MAX) + ++count; + digestmap_set(served_descs, desc, (void*)count); + total_descriptor_downloads++; +} + +/*** Connection statistics ***/ + +/** Start of the current connection stats interval or 0 if we're not + * collecting connection statistics. */ +static time_t start_of_conn_stats_interval; + +/** Initialize connection stats. */ +void +rep_hist_conn_stats_init(time_t now) +{ + start_of_conn_stats_interval = now; +} + +/* Count connections that we read and wrote less than these many bytes + * from/to as below threshold. */ +#define BIDI_THRESHOLD 20480 + +/* Count connections that we read or wrote at least this factor as many + * bytes from/to than we wrote or read to/from as mostly reading or + * writing. */ +#define BIDI_FACTOR 10 + +/* Interval length in seconds for considering read and written bytes for + * connection stats. */ +#define BIDI_INTERVAL 10 + +/** Start of next BIDI_INTERVAL second interval. */ +static time_t bidi_next_interval = 0; + +/** Number of connections that we read and wrote less than BIDI_THRESHOLD + * bytes from/to in BIDI_INTERVAL seconds. */ +static uint32_t below_threshold = 0; + +/** Number of connections that we read at least BIDI_FACTOR times more + * bytes from than we wrote to in BIDI_INTERVAL seconds. */ +static uint32_t mostly_read = 0; + +/** Number of connections that we wrote at least BIDI_FACTOR times more + * bytes to than we read from in BIDI_INTERVAL seconds. */ +static uint32_t mostly_written = 0; + +/** Number of connections that we read and wrote at least BIDI_THRESHOLD + * bytes from/to, but not BIDI_FACTOR times more in either direction in + * BIDI_INTERVAL seconds. */ +static uint32_t both_read_and_written = 0; + +/** Entry in a map from connection ID to the number of read and written + * bytes on this connection in a BIDI_INTERVAL second interval. */ +typedef struct bidi_map_entry_t { + HT_ENTRY(bidi_map_entry_t) node; + uint64_t conn_id; /**< Connection ID */ + size_t read; /**< Number of read bytes */ + size_t written; /**< Number of written bytes */ +} bidi_map_entry_t; + +/** Map of OR connections together with the number of read and written + * bytes in the current BIDI_INTERVAL second interval. */ +static HT_HEAD(bidimap, bidi_map_entry_t) bidi_map = + HT_INITIALIZER(); + +static int +bidi_map_ent_eq(const bidi_map_entry_t *a, const bidi_map_entry_t *b) +{ + return a->conn_id == b->conn_id; +} + +/* DOCDOC bidi_map_ent_hash */ +static unsigned +bidi_map_ent_hash(const bidi_map_entry_t *entry) +{ + return (unsigned) entry->conn_id; +} + +HT_PROTOTYPE(bidimap, bidi_map_entry_t, node, bidi_map_ent_hash, + bidi_map_ent_eq); +HT_GENERATE(bidimap, bidi_map_entry_t, node, bidi_map_ent_hash, + bidi_map_ent_eq, 0.6, malloc, realloc, free); + +/* DOCDOC bidi_map_free */ +static void +bidi_map_free(void) +{ + bidi_map_entry_t **ptr, **next, *ent; + for (ptr = HT_START(bidimap, &bidi_map); ptr; ptr = next) { + ent = *ptr; + next = HT_NEXT_RMV(bidimap, &bidi_map, ptr); + tor_free(ent); } + HT_CLEAR(bidimap, &bidi_map); +} + +/** Reset counters for conn statistics. */ +void +rep_hist_reset_conn_stats(time_t now) +{ + start_of_conn_stats_interval = now; + below_threshold = 0; + mostly_read = 0; + mostly_written = 0; + both_read_and_written = 0; + bidi_map_free(); +} + +/** Stop collecting connection stats in a way that we can re-start doing + * so in rep_hist_conn_stats_init(). */ +void +rep_hist_conn_stats_term(void) +{ + rep_hist_reset_conn_stats(0); +} + +/** We read <b>num_read</b> bytes and wrote <b>num_written</b> from/to OR + * connection <b>conn_id</b> in second <b>when</b>. If this is the first + * observation in a new interval, sum up the last observations. Add bytes + * for this connection. */ +void +rep_hist_note_or_conn_bytes(uint64_t conn_id, size_t num_read, + size_t num_written, time_t when) +{ + if (!start_of_conn_stats_interval) + return; + /* Initialize */ + if (bidi_next_interval == 0) + bidi_next_interval = when + BIDI_INTERVAL; + /* Sum up last period's statistics */ + if (when >= bidi_next_interval) { + bidi_map_entry_t **ptr, **next, *ent; + for (ptr = HT_START(bidimap, &bidi_map); ptr; ptr = next) { + ent = *ptr; + if (ent->read + ent->written < BIDI_THRESHOLD) + below_threshold++; + else if (ent->read >= ent->written * BIDI_FACTOR) + mostly_read++; + else if (ent->written >= ent->read * BIDI_FACTOR) + mostly_written++; + else + both_read_and_written++; + next = HT_NEXT_RMV(bidimap, &bidi_map, ptr); + tor_free(ent); + } + while (when >= bidi_next_interval) + bidi_next_interval += BIDI_INTERVAL; + log_info(LD_GENERAL, "%d below threshold, %d mostly read, " + "%d mostly written, %d both read and written.", + below_threshold, mostly_read, mostly_written, + both_read_and_written); + } + /* Add this connection's bytes. */ + if (num_read > 0 || num_written > 0) { + bidi_map_entry_t *entry, lookup; + lookup.conn_id = conn_id; + entry = HT_FIND(bidimap, &bidi_map, &lookup); + if (entry) { + entry->written += num_written; + entry->read += num_read; + } else { + entry = tor_malloc_zero(sizeof(bidi_map_entry_t)); + entry->conn_id = conn_id; + entry->written = num_written; + entry->read = num_read; + HT_INSERT(bidimap, &bidi_map, entry); + } + } +} + +/** Return a newly allocated string containing the connection statistics + * until <b>now</b>, or NULL if we're not collecting conn stats. Caller must + * ensure start_of_conn_stats_interval is in the past. */ +char * +rep_hist_format_conn_stats(time_t now) +{ + char *result, written[ISO_TIME_LEN+1]; + + if (!start_of_conn_stats_interval) + return NULL; /* Not initialized. */ + + tor_assert(now >= start_of_conn_stats_interval); + + format_iso_time(written, now); + tor_asprintf(&result, "conn-bi-direct %s (%d s) %d,%d,%d,%d\n", + written, + (unsigned) (now - start_of_conn_stats_interval), + below_threshold, + mostly_read, + mostly_written, + both_read_and_written); + return result; +} + +/** If 24 hours have passed since the beginning of the current conn stats + * period, write conn stats to $DATADIR/stats/conn-stats (possibly + * overwriting an existing file) and reset counters. Return when we would + * next want to write conn stats or 0 if we never want to write. */ +time_t +rep_hist_conn_stats_write(time_t now) +{ + char *statsdir = NULL, *filename = NULL, *str = NULL; + + if (!start_of_conn_stats_interval) + return 0; /* Not initialized. */ + if (start_of_conn_stats_interval + WRITE_STATS_INTERVAL > now) + goto done; /* Not ready to write */ + + /* Generate history string. */ + str = rep_hist_format_conn_stats(now); + + /* Reset counters. */ + rep_hist_reset_conn_stats(now); + + /* Try to write to disk. */ + statsdir = get_datadir_fname("stats"); + if (check_private_dir(statsdir, CPD_CREATE, get_options()->User) < 0) { + log_warn(LD_HIST, "Unable to create stats/ directory!"); + goto done; + } + filename = get_datadir_fname2("stats", "conn-stats"); + if (write_str_to_file(filename, str, 0) < 0) + log_warn(LD_HIST, "Unable to write conn stats to disk!"); + + done: tor_free(str); -#undef SHARES - return start_of_buffer_stats_interval + WRITE_STATS_INTERVAL; + tor_free(filename); + tor_free(statsdir); + return start_of_conn_stats_interval + WRITE_STATS_INTERVAL; } /** Free all storage held by the OR/link history caches, by the @@ -2576,11 +3006,15 @@ rep_hist_free_all(void) tor_free(exit_streams); built_last_stability_doc_at = 0; predicted_ports_free(); + bidi_map_free(); + if (circuits_for_buffer_stats) { SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *, s, tor_free(s)); smartlist_free(circuits_for_buffer_stats); circuits_for_buffer_stats = NULL; } + rep_hist_desc_stats_term(); + total_descriptor_downloads = 0; } |