From 0a5ecb334298187a64f58382231245111130aa76 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 9 Mar 2021 15:36:40 +0200 Subject: Implement backbone of overload statistics. - Implement overload statistics structure. - Implement function that keeps track of overload statistics. - Implement function that writes overload statistics to descriptor. - Unittest for the whole logic. --- src/feature/stats/rephist.c | 116 ++++++++++++++++++++++++++++++++++++++++++++ src/feature/stats/rephist.h | 18 +++++++ 2 files changed, 134 insertions(+) (limited to 'src/feature/stats') diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c index f7c9336bff..64bec000df 100644 --- a/src/feature/stats/rephist.c +++ b/src/feature/stats/rephist.c @@ -183,6 +183,122 @@ static time_t started_tracking_stability = 0; /** Map from hex OR identity digest to or_history_t. */ static digestmap_t *history_map = NULL; +/** Represents a state of overload stats. + * + * All the timestamps in this structure have already been rounded down to the + * nearest hour. */ +typedef struct { + /* When did we last experience a general overload? */ + time_t overload_general_time; + + /* When did we last experience a bandwidth-related overload? */ + time_t overload_ratelimits_time; + /* How many times have we gone off the our read limits? */ + uint64_t overload_read_count; + /* How many times have we gone off the our write limits? */ + uint64_t overload_write_count; + + /* When did we last experience a file descriptor exhaustion? */ + time_t overload_fd_exhausted_time; + /* How many times have we experienced a file descriptor exhaustion? */ + uint64_t overload_fd_exhausted; +} overload_stats_t; + +/** Current state of overload stats */ +static overload_stats_t overload_stats; + +/** Return true if this overload happened within the last `n_hours`. */ +static bool +overload_happened_recently(time_t overload_time, unsigned n_hours) +{ + /* An overload is relevant if it happened in the last 72 hours */ + if (overload_time > approx_time() - 3600 * n_hours) { + return true; + } + return false; +} + +/* The current version of the overload stats version */ +#define OVERLOAD_STATS_VERSION 1 + +/** Returns an allocated string for extra-info documents for publishing + * overload statistics. */ +char * +rep_hist_get_overload_stats_lines(void) +{ + char *result = NULL; + smartlist_t *chunks = smartlist_new(); + char tbuf[ISO_TIME_LEN+1]; + + /* First encode the general overload */ + if (overload_happened_recently(overload_stats.overload_general_time, 72)) { + format_iso_time(tbuf, overload_stats.overload_general_time); + smartlist_add_asprintf(chunks, "overload-general %d %s", + OVERLOAD_STATS_VERSION, tbuf); + } + + /* Now do bandwidth-related overloads */ + if (overload_happened_recently(overload_stats.overload_ratelimits_time,24)) { + const or_options_t *options = get_options(); + format_iso_time(tbuf, overload_stats.overload_ratelimits_time); + smartlist_add_asprintf(chunks, + "overload-ratelimits %d %s %" PRIu64 " %" PRIu64 + " %" PRIu64 " %" PRIu64, + OVERLOAD_STATS_VERSION, tbuf, + options->BandwidthRate, options->BandwidthBurst, + overload_stats.overload_read_count, + overload_stats.overload_write_count); + } + + /* Finally file descriptor overloads */ + if (overload_happened_recently( + overload_stats.overload_fd_exhausted_time, 72)) { + format_iso_time(tbuf, overload_stats.overload_fd_exhausted_time); + smartlist_add_asprintf(chunks, "overload-fd-exhausted %d %s", + OVERLOAD_STATS_VERSION, tbuf); + } + + /* Bail early if we had nothing to write */ + if (smartlist_len(chunks) == 0) { + goto done; + } + + result = smartlist_join_strings(chunks, "\n", 0, NULL); + + done: + SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp)); + smartlist_free(chunks); + return result; +} + +/** Round down the time in `a` to the beginning of the current hour */ +#define SET_TO_START_OF_HOUR(a) STMT_BEGIN \ + (a) = approx_time() - (approx_time() % 3600); \ +STMT_END + +/** Note down an overload event of type `overload`. */ +void +rep_hist_note_overload(overload_type_t overload) +{ + switch (overload) { + case OVERLOAD_GENERAL: + SET_TO_START_OF_HOUR(overload_stats.overload_general_time); + break; + case OVERLOAD_READ: + SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time); + overload_stats.overload_read_count++; + break; + case OVERLOAD_WRITE: + SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time); + overload_stats.overload_write_count++; + break; + case OVERLOAD_FD_EXHAUSTED: + SET_TO_START_OF_HOUR(overload_stats.overload_fd_exhausted_time); + overload_stats.overload_fd_exhausted++; + break; + } +} + /** Return the or_history_t for the OR with identity digest id, * creating it if necessary. */ static or_history_t * diff --git a/src/feature/stats/rephist.h b/src/feature/stats/rephist.h index 7ecbfebfd7..45020d7967 100644 --- a/src/feature/stats/rephist.h +++ b/src/feature/stats/rephist.h @@ -140,6 +140,24 @@ void rep_hist_reset_padding_counts(void); void rep_hist_prep_published_padding_counts(time_t now); void rep_hist_padding_count_timers(uint64_t num_timers); +/** + * Represents the various types of overload we keep track of and expose in our + * extra-info descriptor. +*/ +typedef enum { + /* A general overload -- can have many different causes. */ + OVERLOAD_GENERAL, + /* We went over our configured read rate/burst bandwidth limit */ + OVERLOAD_READ, + /* We went over our configured write rate/burst bandwidth limit */ + OVERLOAD_WRITE, + /* We exhausted the file descriptors in this system */ + OVERLOAD_FD_EXHAUSTED, +} overload_type_t; + +void rep_hist_note_overload(overload_type_t overload); +char *rep_hist_get_overload_stats_lines(void); + #ifdef TOR_UNIT_TESTS struct hs_v2_stats_t; const struct hs_v2_stats_t *rep_hist_get_hs_v2_stats(void); -- cgit v1.2.3-54-g00ecf From 7740a8b5d4de649e3ba2a0578f789140725974b6 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Wed, 10 Mar 2021 14:37:36 +0200 Subject: Rate-limit counter should increase once per minute. --- src/feature/stats/rephist.c | 19 +++++++++++++++---- src/test/test_stats.c | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 4 deletions(-) (limited to 'src/feature/stats') diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c index 64bec000df..2c3f362e45 100644 --- a/src/feature/stats/rephist.c +++ b/src/feature/stats/rephist.c @@ -280,18 +280,29 @@ STMT_END void rep_hist_note_overload(overload_type_t overload) { + static time_t last_read_counted = 0; + static time_t last_write_counted = 0; + switch (overload) { case OVERLOAD_GENERAL: SET_TO_START_OF_HOUR(overload_stats.overload_general_time); break; - case OVERLOAD_READ: + case OVERLOAD_READ: { SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time); - overload_stats.overload_read_count++; + if (approx_time() >= last_read_counted + 60) { /* Count once a minute */ + overload_stats.overload_read_count++; + last_read_counted = approx_time(); + } break; - case OVERLOAD_WRITE: + } + case OVERLOAD_WRITE: { SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time); - overload_stats.overload_write_count++; + if (approx_time() >= last_write_counted + 60) { /* Count once a minute */ + overload_stats.overload_write_count++; + last_write_counted = approx_time(); + } break; + } case OVERLOAD_FD_EXHAUSTED: SET_TO_START_OF_HOUR(overload_stats.overload_fd_exhausted_time); overload_stats.overload_fd_exhausted++; diff --git a/src/test/test_stats.c b/src/test/test_stats.c index 1f14255e31..a22a42f723 100644 --- a/src/test/test_stats.c +++ b/src/test/test_stats.c @@ -807,6 +807,45 @@ test_overload_stats(void *arg) stats_str = rep_hist_get_overload_stats_lines(); tt_assert(!stats_str); + /* Now test the rate-limit rate-limiter ;) */ + for (int i = 0; i < 10; i++) { + rep_hist_note_overload(OVERLOAD_READ); + } + /* We already have an event registered from the previous tests. We just + * registered ten more overload events, but only one should have been counted + * because of the rate limiter */ + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-ratelimits 1 2002-01-10 02:00:00 1000 2000 2 0", + OP_EQ, stats_str); + tor_free(stats_str); + + /* Increment time by 59 secs and try again. No additional events should + register */ + current_time += 59; + update_approx_time(current_time); + + for (int i = 0; i < 10; i++) { + rep_hist_note_overload(OVERLOAD_READ); + } + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-ratelimits 1 2002-01-10 02:00:00 1000 2000 2 0", + OP_EQ, stats_str); + tor_free(stats_str); + + /* Now increment time by 2 secs -- taking it after the minute rate limiting + and see that events will register again */ + current_time += 2; + update_approx_time(current_time); + + for (int i = 0; i < 10; i++) { + rep_hist_note_overload(OVERLOAD_READ); + rep_hist_note_overload(OVERLOAD_WRITE); + } + stats_str = rep_hist_get_overload_stats_lines(); + tt_str_op("overload-ratelimits 1 2002-01-10 02:00:00 1000 2000 3 1", + OP_EQ, stats_str); + tor_free(stats_str); + done: tor_free(stats_str); } -- cgit v1.2.3-54-g00ecf