diff options
author | Nick Mathewson <nickm@torproject.org> | 2016-08-25 14:29:06 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2016-08-25 14:29:06 -0400 |
commit | 1dfa2213a424c36d4aa70fdab60b27b6b5e1ced2 (patch) | |
tree | b53afb27b4ab30c2913ac5e4fba69e8f319fdbc7 /src/or | |
parent | 20136a8207f0cda0d23093a884caae0358f98bac (diff) | |
parent | 9a09513c0bd54abae3317be4136cf2578e11fbf3 (diff) | |
download | tor-1dfa2213a424c36d4aa70fdab60b27b6b5e1ced2.tar.gz tor-1dfa2213a424c36d4aa70fdab60b27b6b5e1ced2.zip |
Merge remote-tracking branch 'andrea/ticket18640_v3'
Diffstat (limited to 'src/or')
-rw-r--r-- | src/or/config.c | 30 | ||||
-rw-r--r-- | src/or/connection.c | 304 | ||||
-rw-r--r-- | src/or/connection.h | 23 | ||||
-rw-r--r-- | src/or/connection_or.c | 4 | ||||
-rw-r--r-- | src/or/connection_or.h | 2 | ||||
-rw-r--r-- | src/or/main.c | 21 | ||||
-rw-r--r-- | src/or/main.h | 4 | ||||
-rw-r--r-- | src/or/or.h | 7 |
8 files changed, 383 insertions, 12 deletions
diff --git a/src/or/config.c b/src/or/config.c index 541025de16..31bf81877d 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -211,6 +211,7 @@ static config_var_t option_vars_[] = { V(CountPrivateBandwidth, BOOL, "0"), V(DataDirectory, FILENAME, NULL), V(DataDirectoryGroupReadable, BOOL, "0"), + V(DisableOOSCheck, BOOL, "1"), V(DisableNetwork, BOOL, "0"), V(DirAllowPrivateAddresses, BOOL, "0"), V(TestingAuthDirTimeToLearnReachability, INTERVAL, "30 minutes"), @@ -1374,6 +1375,35 @@ options_act_reversible(const or_options_t *old_options, char **msg) connection_mark_for_close(conn); } }); + + if (set_conn_limit) { + /* + * If we adjusted the conn limit, recompute the OOS threshold too + * + * How many possible sockets to keep in reserve? If we have lots of + * possible sockets, keep this below a limit and set ConnLimit_high_thresh + * very close to ConnLimit_, but if ConnLimit_ is low, shrink it in + * proportion. + * + * Somewhat arbitrarily, set socks_in_reserve to 5% of ConnLimit_, but + * cap it at 64. + */ + int socks_in_reserve = options->ConnLimit_ / 20; + if (socks_in_reserve > 64) socks_in_reserve = 64; + + options->ConnLimit_high_thresh = options->ConnLimit_ - socks_in_reserve; + options->ConnLimit_low_thresh = (options->ConnLimit_ / 4) * 3; + log_info(LD_GENERAL, + "Recomputed OOS thresholds: ConnLimit %d, ConnLimit_ %d, " + "ConnLimit_high_thresh %d, ConnLimit_low_thresh %d", + options->ConnLimit, options->ConnLimit_, + options->ConnLimit_high_thresh, + options->ConnLimit_low_thresh); + + /* Give the OOS handler a chance with the new thresholds */ + connection_check_oos(get_n_open_sockets(), 0); + } + goto done; rollback: diff --git a/src/or/connection.c b/src/or/connection.c index 68e442df54..9b583f4243 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -754,9 +754,9 @@ connection_mark_for_close_(connection_t *conn, int line, const char *file) * For all other cases, use connection_mark_and_flush() instead, which * checks for or_connection_t properly, instead. See below. */ -void -connection_mark_for_close_internal_(connection_t *conn, - int line, const char *file) +MOCK_IMPL(void, +connection_mark_for_close_internal_, (connection_t *conn, + int line, const char *file)) { assert_connection_ok(conn,0); tor_assert(line); @@ -1090,6 +1090,7 @@ connection_listener_new(const struct sockaddr *listensockaddr, int start_reading = 0; static int global_next_session_group = SESSION_GROUP_FIRST_AUTO; tor_addr_t addr; + int exhaustion = 0; if (listensockaddr->sa_family == AF_INET || listensockaddr->sa_family == AF_INET6) { @@ -1108,6 +1109,11 @@ connection_listener_new(const struct sockaddr *listensockaddr, int e = tor_socket_errno(s); if (ERRNO_IS_RESOURCE_LIMIT(e)) { warn_too_many_conns(); + /* + * We'll call the OOS handler at the error exit, so set the + * exhaustion flag for it. + */ + exhaustion = 1; } else { log_warn(LD_NET, "Socket creation failed: %s", tor_socket_strerror(e)); @@ -1226,6 +1232,11 @@ connection_listener_new(const struct sockaddr *listensockaddr, int e = tor_socket_errno(s); if (ERRNO_IS_RESOURCE_LIMIT(e)) { warn_too_many_conns(); + /* + * We'll call the OOS handler at the error exit, so set the + * exhaustion flag for it. + */ + exhaustion = 1; } else { log_warn(LD_NET,"Socket creation failed: %s.", strerror(e)); } @@ -1344,6 +1355,12 @@ connection_listener_new(const struct sockaddr *listensockaddr, dnsserv_configure_listener(conn); } + /* + * Normal exit; call the OOS handler since connection count just changed; + * the exhaustion flag will always be zero here though. + */ + connection_check_oos(get_n_open_sockets(), 0); + return conn; err: @@ -1352,6 +1369,9 @@ connection_listener_new(const struct sockaddr *listensockaddr, if (conn) connection_free(conn); + /* Call the OOS handler, indicate if we saw an exhaustion-related error */ + connection_check_oos(get_n_open_sockets(), exhaustion); + return NULL; } @@ -1442,21 +1462,34 @@ connection_handle_listener_read(connection_t *conn, int new_type) if (!SOCKET_OK(news)) { /* accept() error */ int e = tor_socket_errno(conn->s); if (ERRNO_IS_ACCEPT_EAGAIN(e)) { - return 0; /* they hung up before we could accept(). that's fine. */ + /* + * they hung up before we could accept(). that's fine. + * + * give the OOS handler a chance to run though + */ + connection_check_oos(get_n_open_sockets(), 0); + return 0; } else if (ERRNO_IS_RESOURCE_LIMIT(e)) { warn_too_many_conns(); + /* Exhaustion; tell the OOS handler */ + connection_check_oos(get_n_open_sockets(), 1); return 0; } /* else there was a real error. */ log_warn(LD_NET,"accept() failed: %s. Closing listener.", tor_socket_strerror(e)); connection_mark_for_close(conn); + /* Tell the OOS handler about this too */ + connection_check_oos(get_n_open_sockets(), 0); return -1; } log_debug(LD_NET, "Connection accepted on socket %d (child of fd %d).", (int)news,(int)conn->s); + /* We accepted a new conn; run OOS handler */ + connection_check_oos(get_n_open_sockets(), 0); + if (make_socket_reuseable(news) < 0) { if (tor_socket_errno(news) == EINVAL) { /* This can happen on OSX if we get a badly timed shutdown. */ @@ -1661,12 +1694,18 @@ connection_connect_sockaddr,(connection_t *conn, s = tor_open_socket_nonblocking(protocol_family, SOCK_STREAM, proto); if (! SOCKET_OK(s)) { + /* + * Early OOS handler calls; it matters if it's an exhaustion-related + * error or not. + */ *socket_error = tor_socket_errno(s); if (ERRNO_IS_RESOURCE_LIMIT(*socket_error)) { warn_too_many_conns(); + connection_check_oos(get_n_open_sockets(), 1); } else { log_warn(LD_NET,"Error creating network socket: %s", tor_socket_strerror(*socket_error)); + connection_check_oos(get_n_open_sockets(), 0); } return -1; } @@ -1676,6 +1715,13 @@ connection_connect_sockaddr,(connection_t *conn, tor_socket_strerror(errno)); } + /* + * We've got the socket open; give the OOS handler a chance to check + * against configuured maximum socket number, but tell it no exhaustion + * failure. + */ + connection_check_oos(get_n_open_sockets(), 0); + if (bindaddr && bind(s, bindaddr, bindaddr_len) < 0) { *socket_error = tor_socket_errno(s); log_warn(LD_NET,"Error binding network socket: %s", @@ -4454,6 +4500,256 @@ connection_reached_eof(connection_t *conn) } } +/** Comparator for the two-orconn case in OOS victim sort */ +static int +oos_victim_comparator_for_orconns(or_connection_t *a, or_connection_t *b) +{ + int a_circs, b_circs; + /* Fewer circuits == higher priority for OOS kill, sort earlier */ + + a_circs = connection_or_get_num_circuits(a); + b_circs = connection_or_get_num_circuits(b); + + if (a_circs < b_circs) return -1; + else if (b_circs > a_circs) return 1; + else return 0; +} + +/** Sort comparator for OOS victims; better targets sort before worse + * ones. */ +static int +oos_victim_comparator(const void **a_v, const void **b_v) +{ + connection_t *a = NULL, *b = NULL; + + /* Get connection pointers out */ + + a = (connection_t *)(*a_v); + b = (connection_t *)(*b_v); + + tor_assert(a != NULL); + tor_assert(b != NULL); + + /* + * We always prefer orconns as victims currently; we won't even see + * these non-orconn cases, but if we do, sort them after orconns. + */ + if (a->type == CONN_TYPE_OR && b->type == CONN_TYPE_OR) { + return oos_victim_comparator_for_orconns(TO_OR_CONN(a), TO_OR_CONN(b)); + } else { + /* + * One isn't an orconn; if one is, it goes first. We currently have no + * opinions about cases where neither is an orconn. + */ + if (a->type == CONN_TYPE_OR) return -1; + else if (b->type == CONN_TYPE_OR) return 1; + else return 0; + } +} + +/** Pick n victim connections for the OOS handler and return them in a + * smartlist. + */ +MOCK_IMPL(STATIC smartlist_t *, +pick_oos_victims, (int n)) +{ + smartlist_t *eligible = NULL, *victims = NULL; + smartlist_t *conns; + int conn_counts_by_type[CONN_TYPE_MAX_ + 1], i; + + /* + * Big damn assumption (someone improve this someday!): + * + * Socket exhaustion normally happens on high-volume relays, and so + * most of the connections involved are orconns. We should pick victims + * by assembling a list of all orconns, and sorting them in order of + * how much 'damage' by some metric we'd be doing by dropping them. + * + * If we move on from orconns, we should probably think about incoming + * directory connections next, or exit connections. Things we should + * probably never kill are controller connections and listeners. + * + * This function will count how many connections of different types + * exist and log it for purposes of gathering data on typical OOS + * situations to guide future improvements. + */ + + /* First, get the connection array */ + conns = get_connection_array(); + /* + * Iterate it and pick out eligible connection types, and log some stats + * along the way. + */ + eligible = smartlist_new(); + memset(conn_counts_by_type, 0, sizeof(conn_counts_by_type)); + SMARTLIST_FOREACH_BEGIN(conns, connection_t *, c) { + /* Bump the counter */ + tor_assert(c->type <= CONN_TYPE_MAX_); + ++(conn_counts_by_type[c->type]); + + /* Skip anything without a socket we can free */ + if (!(SOCKET_OK(c->s))) { + continue; + } + + /* Skip anything we would count as moribund */ + if (connection_is_moribund(c)) { + continue; + } + + switch (c->type) { + case CONN_TYPE_OR: + /* We've got an orconn, it's eligible to be OOSed */ + smartlist_add(eligible, c); + break; + default: + /* We don't know what to do with it, ignore it */ + break; + } + } SMARTLIST_FOREACH_END(c); + + /* Log some stats */ + if (smartlist_len(conns) > 0) { + /* At least one counter must be non-zero */ + log_info(LD_NET, "Some stats on conn types seen during OOS follow"); + for (i = CONN_TYPE_MIN_; i <= CONN_TYPE_MAX_; ++i) { + /* Did we see any? */ + if (conn_counts_by_type[i] > 0) { + log_info(LD_NET, "%s: %d conns", + conn_type_to_string(i), + conn_counts_by_type[i]); + } + } + log_info(LD_NET, "Done with OOS conn type stats"); + } + + /* Did we find more eligible targets than we want to kill? */ + if (smartlist_len(eligible) > n) { + /* Sort the list in order of target preference */ + smartlist_sort(eligible, oos_victim_comparator); + /* Pick first n as victims */ + victims = smartlist_new(); + for (i = 0; i < n; ++i) { + smartlist_add(victims, smartlist_get(eligible, i)); + } + /* Free the original list */ + smartlist_free(eligible); + } else { + /* No, we can just call them all victims */ + victims = eligible; + } + + return victims; +} + +/** Kill a list of connections for the OOS handler. */ +MOCK_IMPL(STATIC void, +kill_conn_list_for_oos, (smartlist_t *conns)) +{ + if (!conns) return; + + SMARTLIST_FOREACH_BEGIN(conns, connection_t *, c) { + /* Make sure the channel layer gets told about orconns */ + if (c->type == CONN_TYPE_OR) { + connection_or_close_for_error(TO_OR_CONN(c), 1); + } else { + connection_mark_for_close(c); + } + } SMARTLIST_FOREACH_END(c); + + log_notice(LD_NET, + "OOS handler marked %d connections", + smartlist_len(conns)); +} + +/** Out-of-Sockets handler; n_socks is the current number of open + * sockets, and failed is non-zero if a socket exhaustion related + * error immediately preceded this call. This is where to do + * circuit-killing heuristics as needed. + */ +void +connection_check_oos(int n_socks, int failed) +{ + int target_n_socks = 0, moribund_socks, socks_to_kill; + smartlist_t *conns; + + /* Early exit: is OOS checking disabled? */ + if (get_options()->DisableOOSCheck) { + return; + } + + /* Sanity-check args */ + tor_assert(n_socks >= 0); + + /* + * Make some log noise; keep it at debug level since this gets a chance + * to run on every connection attempt. + */ + log_debug(LD_NET, + "Running the OOS handler (%d open sockets, %s)", + n_socks, (failed != 0) ? "exhaustion seen" : "no exhaustion"); + + /* + * Check if we're really handling an OOS condition, and if so decide how + * many sockets we want to get down to. Be sure we check if the threshold + * is distinct from zero first; it's possible for this to be called a few + * times before we've finished reading the config. + */ + if (n_socks >= get_options()->ConnLimit_high_thresh && + get_options()->ConnLimit_high_thresh != 0 && + get_options()->ConnLimit_ != 0) { + /* Try to get down to the low threshold */ + target_n_socks = get_options()->ConnLimit_low_thresh; + log_notice(LD_NET, + "Current number of sockets %d is greater than configured " + "limit %d; OOS handler trying to get down to %d", + n_socks, get_options()->ConnLimit_high_thresh, + target_n_socks); + } else if (failed) { + /* + * If we're not at the limit but we hit a socket exhaustion error, try to + * drop some (but not as aggressively as ConnLimit_low_threshold, which is + * 3/4 of ConnLimit_) + */ + target_n_socks = (n_socks * 9) / 10; + log_notice(LD_NET, + "We saw socket exhaustion at %d open sockets; OOS handler " + "trying to get down to %d", + n_socks, target_n_socks); + } + + if (target_n_socks > 0) { + /* + * It's an OOS! + * + * Count moribund sockets; it's be important that anything we decide + * to get rid of here but don't immediately close get counted as moribund + * on subsequent invocations so we don't try to kill too many things if + * connection_check_oos() gets called multiple times. + */ + moribund_socks = connection_count_moribund(); + + if (moribund_socks < n_socks - target_n_socks) { + socks_to_kill = n_socks - target_n_socks - moribund_socks; + + conns = pick_oos_victims(socks_to_kill); + if (conns) { + kill_conn_list_for_oos(conns); + log_notice(LD_NET, + "OOS handler killed %d conns", smartlist_len(conns)); + smartlist_free(conns); + } else { + log_notice(LD_NET, "OOS handler failed to pick any victim conns"); + } + } else { + log_notice(LD_NET, + "Not killing any sockets for OOS because there are %d " + "already moribund, and we only want to eliminate %d", + moribund_socks, n_socks - target_n_socks); + } + } +} + /** Log how many bytes are used by buffers of different kinds and sizes. */ void connection_dump_buffer_mem_stats(int severity) diff --git a/src/or/connection.h b/src/or/connection.h index f8e0f73246..d25e002fa4 100644 --- a/src/or/connection.h +++ b/src/or/connection.h @@ -34,8 +34,8 @@ void connection_about_to_close_connection(connection_t *conn); void connection_close_immediate(connection_t *conn); void connection_mark_for_close_(connection_t *conn, int line, const char *file); -void connection_mark_for_close_internal_(connection_t *conn, - int line, const char *file); +MOCK_DECL(void, connection_mark_for_close_internal_, + (connection_t *conn, int line, const char *file)); #define connection_mark_for_close(c) \ connection_mark_for_close_((c), __LINE__, SHORT_FILE__) @@ -247,6 +247,22 @@ void clock_skew_warning(const connection_t *conn, long apparent_skew, int trusted, log_domain_mask_t domain, const char *received, const char *source); +/** Check if a connection is on the way out so the OOS handler doesn't try + * to kill more than it needs. */ +static inline int +connection_is_moribund(connection_t *conn) +{ + if (conn != NULL && + (conn->conn_array_index < 0 || + conn->marked_for_close)) { + return 1; + } else { + return 0; + } +} + +void connection_check_oos(int n_socks, int failed); + #ifdef CONNECTION_PRIVATE STATIC void connection_free_(connection_t *conn); @@ -265,6 +281,9 @@ MOCK_DECL(STATIC int,connection_connect_sockaddr, const struct sockaddr *bindaddr, socklen_t bindaddr_len, int *socket_error)); +MOCK_DECL(STATIC void, kill_conn_list_for_oos, (smartlist_t *conns)); +MOCK_DECL(STATIC smartlist_t *, pick_oos_victims, (int n)); + #endif #endif diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 1f0c4bdef5..72d8e13e90 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -394,8 +394,8 @@ connection_or_change_state(or_connection_t *conn, uint8_t state) * be an or_connection_t field, but it got moved to channel_t and we * shouldn't maintain two copies. */ -int -connection_or_get_num_circuits(or_connection_t *conn) +MOCK_IMPL(int, +connection_or_get_num_circuits, (or_connection_t *conn)) { tor_assert(conn); diff --git a/src/or/connection_or.h b/src/or/connection_or.h index e2ec47a4f2..2e8c6066cc 100644 --- a/src/or/connection_or.h +++ b/src/or/connection_or.h @@ -64,7 +64,7 @@ void connection_or_init_conn_from_address(or_connection_t *conn, int connection_or_client_learned_peer_id(or_connection_t *conn, const uint8_t *peer_id); time_t connection_or_client_used(or_connection_t *conn); -int connection_or_get_num_circuits(or_connection_t *conn); +MOCK_DECL(int, connection_or_get_num_circuits, (or_connection_t *conn)); void or_handshake_state_free(or_handshake_state_t *state); void or_handshake_state_record_cell(or_connection_t *conn, or_handshake_state_t *state, diff --git a/src/or/main.c b/src/or/main.c index 4dbd9a005b..03c2b7ed58 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -381,8 +381,8 @@ connection_in_array(connection_t *conn) /** Set <b>*array</b> to an array of all connections. <b>*array</b> must not * be modified. */ -smartlist_t * -get_connection_array(void) +MOCK_IMPL(smartlist_t *, +get_connection_array, (void)) { if (!connection_array) connection_array = smartlist_new(); @@ -651,6 +651,23 @@ close_closeable_connections(void) } } +/** Count moribund connections for the OOS handler */ +MOCK_IMPL(int, +connection_count_moribund, (void)) +{ + int moribund = 0; + + /* + * Count things we'll try to kill when close_closeable_connections() + * runs next. + */ + SMARTLIST_FOREACH_BEGIN(closeable_connection_lst, connection_t *, conn) { + if (SOCKET_OK(conn->s) && connection_is_moribund(conn)) ++moribund; + } SMARTLIST_FOREACH_END(conn); + + return moribund; +} + /** Libevent callback: this gets invoked when (connection_t*)<b>conn</b> has * some data to read. */ static void diff --git a/src/or/main.h b/src/or/main.h index 31a22de424..0220ae3c57 100644 --- a/src/or/main.h +++ b/src/or/main.h @@ -25,7 +25,7 @@ int connection_in_array(connection_t *conn); void add_connection_to_closeable_list(connection_t *conn); int connection_is_on_closeable_list(connection_t *conn); -smartlist_t *get_connection_array(void); +MOCK_DECL(smartlist_t *, get_connection_array, (void)); MOCK_DECL(uint64_t,get_bytes_read,(void)); MOCK_DECL(uint64_t,get_bytes_written,(void)); @@ -47,6 +47,8 @@ MOCK_DECL(void,connection_start_writing,(connection_t *conn)); void connection_stop_reading_from_linked_conn(connection_t *conn); +MOCK_DECL(int, connection_count_moribund, (void)); + void directory_all_unreachable(time_t now); void directory_info_has_arrived(time_t now, int from_cache, int suppress_logs); diff --git a/src/or/or.h b/src/or/or.h index 43b31c0fdd..be58fa0d0a 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3699,6 +3699,10 @@ typedef struct { int ConnLimit; /**< Demanded minimum number of simultaneous connections. */ int ConnLimit_; /**< Maximum allowed number of simultaneous connections. */ + int ConnLimit_high_thresh; /**< start trying to lower socket usage if we + * have this many. */ + int ConnLimit_low_thresh; /**< try to get down to here after socket + * exhaustion. */ int RunAsDaemon; /**< If true, run in the background. (Unix only) */ int FascistFirewall; /**< Whether to prefer ORs reachable on open ports. */ smartlist_t *FirewallPorts; /**< Which ports our firewall allows @@ -4454,6 +4458,9 @@ typedef struct { * participate in the protocol. If on (default), a flag is added to the * vote indicating participation. */ int AuthDirSharedRandomness; + + /** If 1, we skip all OOS checks. */ + int DisableOOSCheck; } or_options_t; /** Persistent state for an onion router, as saved to disk. */ |