diff options
Diffstat (limited to 'src/or/main.c')
-rw-r--r-- | src/or/main.c | 190 |
1 files changed, 120 insertions, 70 deletions
diff --git a/src/or/main.c b/src/or/main.c index 87b57f89e3..9dce158b33 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -163,11 +163,6 @@ token_bucket_rw_t global_bucket; /* Token bucket for relayed traffic. */ token_bucket_rw_t global_relayed_bucket; -/* DOCDOC stats_prev_n_read */ -static uint64_t stats_prev_n_read = 0; -/* DOCDOC stats_prev_n_written */ -static uint64_t stats_prev_n_written = 0; - /* XXX we might want to keep stats about global_relayed_*_bucket too. Or not.*/ /** How many bytes have we read since we started the process? */ static uint64_t stats_n_bytes_read = 0; @@ -1258,7 +1253,8 @@ run_connection_housekeeping(int i, time_t now) } else if (we_are_hibernating() && ! have_any_circuits && !connection_get_outbuf_len(conn)) { - /* We're hibernating, there's no circuits, and nothing to flush.*/ + /* We're hibernating or shutting down, there's no circuits, and nothing to + * flush.*/ log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) " "[Hibernating or exiting].", (int)conn->s,conn->address, conn->port); @@ -1497,7 +1493,7 @@ get_my_roles(const or_options_t *options) int roles = 0; int is_bridge = options->BridgeRelay; - int is_client = any_client_port_set(options); + int is_client = options_any_client_port_set(options); int is_relay = server_mode(options); int is_dirauth = authdir_mode_v3(options); int is_bridgeauth = authdir_mode_bridge(options); @@ -2500,10 +2496,100 @@ hs_service_callback(time_t now, const or_options_t *options) /** Timer: used to invoke second_elapsed_callback() once per second. */ static periodic_timer_t *second_timer = NULL; -/** Number of libevent errors in the last second: we die if we get too many. */ -static int n_libevent_errors = 0; -/** Last time that second_elapsed_callback was called. */ + +/** + * Enable or disable the per-second timer as appropriate, creating it if + * necessary. + */ +void +reschedule_per_second_timer(void) +{ + struct timeval one_second; + one_second.tv_sec = 1; + one_second.tv_usec = 0; + + if (! second_timer) { + second_timer = periodic_timer_new(tor_libevent_get_base(), + &one_second, + second_elapsed_callback, + NULL); + tor_assert(second_timer); + } + + const bool run_per_second_events = + control_any_per_second_event_enabled() || ! net_is_completely_disabled(); + + if (run_per_second_events) { + periodic_timer_launch(second_timer, &one_second); + } else { + periodic_timer_disable(second_timer); + } +} + +/** Last time that update_current_time was called. */ static time_t current_second = 0; +/** Last time that update_current_time updated current_second. */ +static monotime_coarse_t current_second_last_changed; + +/** + * Set the current time to "now", which should be the value returned by + * time(). Check for clock jumps and track the total number of seconds we + * have been running. + */ +void +update_current_time(time_t now) +{ + if (PREDICT_LIKELY(now == current_second)) { + /* We call this function a lot. Most frequently, the current second + * will not have changed, so we just return. */ + return; + } + + const time_t seconds_elapsed = current_second ? (now - current_second) : 0; + + /* Check the wall clock against the monotonic clock, so we can + * better tell idleness from clock jumps and/or other shenanigans. */ + monotime_coarse_t last_updated; + memcpy(&last_updated, ¤t_second_last_changed, sizeof(last_updated)); + monotime_coarse_get(¤t_second_last_changed); + + /** How much clock jumping do we tolerate? */ +#define NUM_JUMPED_SECONDS_BEFORE_WARN 100 + + /** How much idleness do we tolerate? */ +#define NUM_IDLE_SECONDS_BEFORE_WARN 3600 + + if (seconds_elapsed < -NUM_JUMPED_SECONDS_BEFORE_WARN) { + // moving back in time is always a bad sign. + circuit_note_clock_jumped(seconds_elapsed, false); + } else if (seconds_elapsed >= NUM_JUMPED_SECONDS_BEFORE_WARN) { + /* Compare the monotonic clock to the result of time(). */ + const int32_t monotime_msec_passed = + monotime_coarse_diff_msec32(&last_updated, + ¤t_second_last_changed); + const int monotime_sec_passed = monotime_msec_passed / 1000; + const int discrepancy = monotime_sec_passed - (int)seconds_elapsed; + /* If the monotonic clock deviates from time(NULL), we have a couple of + * possibilities. On some systems, this means we have been suspended or + * sleeping. Everywhere, it can mean that the wall-clock time has + * been changed -- for example, with settimeofday(). + * + * On the other hand, if the monotonic time matches with the wall-clock + * time, we've probably just been idle for a while, with no events firing. + * we tolerate much more of that. + */ + const bool clock_jumped = abs(discrepancy) > 2; + + if (clock_jumped || seconds_elapsed >= NUM_IDLE_SECONDS_BEFORE_WARN) { + circuit_note_clock_jumped(seconds_elapsed, ! clock_jumped); + } + } else if (seconds_elapsed > 0) { + stats_n_seconds_working += seconds_elapsed; + } + + update_approx_time(now); + current_second = now; +} /** Libevent callback: invoked once every second. */ static void @@ -2513,43 +2599,21 @@ second_elapsed_callback(periodic_timer_t *timer, void *arg) * could use Libevent's timers for this rather than checking the current * time against a bunch of timeouts every second. */ time_t now; - size_t bytes_written; - size_t bytes_read; - int seconds_elapsed; (void)timer; (void)arg; - n_libevent_errors = 0; - - /* log_notice(LD_GENERAL, "Tick."); */ now = time(NULL); - update_approx_time(now); - /* the second has rolled over. check more stuff. */ - seconds_elapsed = current_second ? (int)(now - current_second) : 0; - bytes_read = (size_t)(stats_n_bytes_read - stats_prev_n_read); - bytes_written = (size_t)(stats_n_bytes_written - stats_prev_n_written); - stats_prev_n_read = stats_n_bytes_read; - stats_prev_n_written = stats_n_bytes_written; - - control_event_bandwidth_used((uint32_t)bytes_read,(uint32_t)bytes_written); - control_event_stream_bandwidth_used(); - control_event_conn_bandwidth_used(); - control_event_circ_bandwidth_used(); - control_event_circuit_cell_stats(); - -/** If more than this many seconds have elapsed, probably the clock - * jumped: doesn't count. */ -#define NUM_JUMPED_SECONDS_BEFORE_WARN 100 - if (seconds_elapsed < -NUM_JUMPED_SECONDS_BEFORE_WARN || - seconds_elapsed >= NUM_JUMPED_SECONDS_BEFORE_WARN) { - circuit_note_clock_jumped(seconds_elapsed); - } else if (seconds_elapsed > 0) - stats_n_seconds_working += seconds_elapsed; + /* We don't need to do this once-per-second any more: time-updating is + * only in this callback _because it is a callback_. It should be fine + * to disable this callback, and the time will still get updated. + */ + update_current_time(now); - run_scheduled_events(now); + /* Maybe some controller events are ready to fire */ + control_per_second_events(); - current_second = now; /* remember which second it is, for next time */ + run_scheduled_events(now); } #ifdef HAVE_SYSTEMD_209 @@ -2565,21 +2629,6 @@ systemd_watchdog_callback(periodic_timer_t *timer, void *arg) } #endif /* defined(HAVE_SYSTEMD_209) */ -#ifndef _WIN32 -/** Called when a possibly ignorable libevent error occurs; ensures that we - * don't get into an infinite loop by ignoring too many errors from - * libevent. */ -static int -got_libevent_error(void) -{ - if (++n_libevent_errors > 8) { - log_err(LD_NET, "Too many libevent errors in one second; dying"); - return -1; - } - return 0; -} -#endif /* !defined(_WIN32) */ - #define UPTIME_CUTOFF_FOR_NEW_BANDWIDTH_TEST (6*60*60) /** Called when our IP address seems to have changed. <b>at_interface</b> @@ -2842,17 +2891,7 @@ do_main_loop(void) } /* set up once-a-second callback. */ - if (! second_timer) { - struct timeval one_second; - one_second.tv_sec = 1; - one_second.tv_usec = 0; - - second_timer = periodic_timer_new(tor_libevent_get_base(), - &one_second, - second_elapsed_callback, - NULL); - tor_assert(second_timer); - } + reschedule_per_second_timer(); #ifdef HAVE_SYSTEMD_209 uint64_t watchdog_delay; @@ -2921,6 +2960,11 @@ do_main_loop(void) return run_main_loop_until_done(); } +#ifndef _WIN32 +/** Rate-limiter for EINVAL-type libevent warnings. */ +static ratelim_t libevent_error_ratelim = RATELIM_INIT(10); +#endif + /** * Run the main loop a single time. Return 0 for "exit"; -1 for "exit with * error", and 1 for "run this again." @@ -2986,9 +3030,12 @@ run_main_loop_once(void) return -1; #ifndef _WIN32 } else if (e == EINVAL) { - log_warn(LD_NET, "EINVAL from libevent: should you upgrade libevent?"); - if (got_libevent_error()) + log_fn_ratelim(&libevent_error_ratelim, LOG_WARN, LD_NET, + "EINVAL from libevent: should you upgrade libevent?"); + if (libevent_error_ratelim.n_calls_since_last_time > 8) { + log_err(LD_NET, "Too many libevent errors, too fast: dying"); return -1; + } #endif /* !defined(_WIN32) */ } else { tor_assert_nonfatal_once(! ERRNO_IS_EINPROGRESS(e)); @@ -3034,6 +3081,7 @@ signal_callback(evutil_socket_t fd, short events, void *arg) (void)fd; (void)events; + update_current_time(time(NULL)); process_signal(sig); } @@ -3623,6 +3671,8 @@ tor_free_all(int postfork) hs_free_all(); dos_free_all(); circuitmux_ewma_free_all(); + accounting_free_all(); + if (!postfork) { config_free_all(); or_state_free_all(); @@ -3656,7 +3706,6 @@ tor_free_all(int postfork) memset(&global_bucket, 0, sizeof(global_bucket)); memset(&global_relayed_bucket, 0, sizeof(global_relayed_bucket)); - stats_prev_n_read = stats_prev_n_written = 0; stats_n_bytes_read = stats_n_bytes_written = 0; time_of_process_start = 0; time_of_last_signewnym = 0; @@ -3670,8 +3719,9 @@ tor_free_all(int postfork) should_init_bridge_stats = 1; dns_honesty_first_time = 1; heartbeat_callback_first_time = 1; - n_libevent_errors = 0; current_second = 0; + memset(¤t_second_last_changed, 0, + sizeof(current_second_last_changed)); if (!postfork) { release_lockfile(); |