aboutsummaryrefslogtreecommitdiff
path: root/src/or
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2016-08-25 14:29:06 -0400
committerNick Mathewson <nickm@torproject.org>2016-08-25 14:29:06 -0400
commit1dfa2213a424c36d4aa70fdab60b27b6b5e1ced2 (patch)
treeb53afb27b4ab30c2913ac5e4fba69e8f319fdbc7 /src/or
parent20136a8207f0cda0d23093a884caae0358f98bac (diff)
parent9a09513c0bd54abae3317be4136cf2578e11fbf3 (diff)
downloadtor-1dfa2213a424c36d4aa70fdab60b27b6b5e1ced2.tar.gz
tor-1dfa2213a424c36d4aa70fdab60b27b6b5e1ced2.zip
Merge remote-tracking branch 'andrea/ticket18640_v3'
Diffstat (limited to 'src/or')
-rw-r--r--src/or/config.c30
-rw-r--r--src/or/connection.c304
-rw-r--r--src/or/connection.h23
-rw-r--r--src/or/connection_or.c4
-rw-r--r--src/or/connection_or.h2
-rw-r--r--src/or/main.c21
-rw-r--r--src/or/main.h4
-rw-r--r--src/or/or.h7
8 files changed, 383 insertions, 12 deletions
diff --git a/src/or/config.c b/src/or/config.c
index 541025de16..31bf81877d 100644
--- a/src/or/config.c
+++ b/src/or/config.c
@@ -211,6 +211,7 @@ static config_var_t option_vars_[] = {
V(CountPrivateBandwidth, BOOL, "0"),
V(DataDirectory, FILENAME, NULL),
V(DataDirectoryGroupReadable, BOOL, "0"),
+ V(DisableOOSCheck, BOOL, "1"),
V(DisableNetwork, BOOL, "0"),
V(DirAllowPrivateAddresses, BOOL, "0"),
V(TestingAuthDirTimeToLearnReachability, INTERVAL, "30 minutes"),
@@ -1374,6 +1375,35 @@ options_act_reversible(const or_options_t *old_options, char **msg)
connection_mark_for_close(conn);
}
});
+
+ if (set_conn_limit) {
+ /*
+ * If we adjusted the conn limit, recompute the OOS threshold too
+ *
+ * How many possible sockets to keep in reserve? If we have lots of
+ * possible sockets, keep this below a limit and set ConnLimit_high_thresh
+ * very close to ConnLimit_, but if ConnLimit_ is low, shrink it in
+ * proportion.
+ *
+ * Somewhat arbitrarily, set socks_in_reserve to 5% of ConnLimit_, but
+ * cap it at 64.
+ */
+ int socks_in_reserve = options->ConnLimit_ / 20;
+ if (socks_in_reserve > 64) socks_in_reserve = 64;
+
+ options->ConnLimit_high_thresh = options->ConnLimit_ - socks_in_reserve;
+ options->ConnLimit_low_thresh = (options->ConnLimit_ / 4) * 3;
+ log_info(LD_GENERAL,
+ "Recomputed OOS thresholds: ConnLimit %d, ConnLimit_ %d, "
+ "ConnLimit_high_thresh %d, ConnLimit_low_thresh %d",
+ options->ConnLimit, options->ConnLimit_,
+ options->ConnLimit_high_thresh,
+ options->ConnLimit_low_thresh);
+
+ /* Give the OOS handler a chance with the new thresholds */
+ connection_check_oos(get_n_open_sockets(), 0);
+ }
+
goto done;
rollback:
diff --git a/src/or/connection.c b/src/or/connection.c
index 68e442df54..9b583f4243 100644
--- a/src/or/connection.c
+++ b/src/or/connection.c
@@ -754,9 +754,9 @@ connection_mark_for_close_(connection_t *conn, int line, const char *file)
* For all other cases, use connection_mark_and_flush() instead, which
* checks for or_connection_t properly, instead. See below.
*/
-void
-connection_mark_for_close_internal_(connection_t *conn,
- int line, const char *file)
+MOCK_IMPL(void,
+connection_mark_for_close_internal_, (connection_t *conn,
+ int line, const char *file))
{
assert_connection_ok(conn,0);
tor_assert(line);
@@ -1090,6 +1090,7 @@ connection_listener_new(const struct sockaddr *listensockaddr,
int start_reading = 0;
static int global_next_session_group = SESSION_GROUP_FIRST_AUTO;
tor_addr_t addr;
+ int exhaustion = 0;
if (listensockaddr->sa_family == AF_INET ||
listensockaddr->sa_family == AF_INET6) {
@@ -1108,6 +1109,11 @@ connection_listener_new(const struct sockaddr *listensockaddr,
int e = tor_socket_errno(s);
if (ERRNO_IS_RESOURCE_LIMIT(e)) {
warn_too_many_conns();
+ /*
+ * We'll call the OOS handler at the error exit, so set the
+ * exhaustion flag for it.
+ */
+ exhaustion = 1;
} else {
log_warn(LD_NET, "Socket creation failed: %s",
tor_socket_strerror(e));
@@ -1226,6 +1232,11 @@ connection_listener_new(const struct sockaddr *listensockaddr,
int e = tor_socket_errno(s);
if (ERRNO_IS_RESOURCE_LIMIT(e)) {
warn_too_many_conns();
+ /*
+ * We'll call the OOS handler at the error exit, so set the
+ * exhaustion flag for it.
+ */
+ exhaustion = 1;
} else {
log_warn(LD_NET,"Socket creation failed: %s.", strerror(e));
}
@@ -1344,6 +1355,12 @@ connection_listener_new(const struct sockaddr *listensockaddr,
dnsserv_configure_listener(conn);
}
+ /*
+ * Normal exit; call the OOS handler since connection count just changed;
+ * the exhaustion flag will always be zero here though.
+ */
+ connection_check_oos(get_n_open_sockets(), 0);
+
return conn;
err:
@@ -1352,6 +1369,9 @@ connection_listener_new(const struct sockaddr *listensockaddr,
if (conn)
connection_free(conn);
+ /* Call the OOS handler, indicate if we saw an exhaustion-related error */
+ connection_check_oos(get_n_open_sockets(), exhaustion);
+
return NULL;
}
@@ -1442,21 +1462,34 @@ connection_handle_listener_read(connection_t *conn, int new_type)
if (!SOCKET_OK(news)) { /* accept() error */
int e = tor_socket_errno(conn->s);
if (ERRNO_IS_ACCEPT_EAGAIN(e)) {
- return 0; /* they hung up before we could accept(). that's fine. */
+ /*
+ * they hung up before we could accept(). that's fine.
+ *
+ * give the OOS handler a chance to run though
+ */
+ connection_check_oos(get_n_open_sockets(), 0);
+ return 0;
} else if (ERRNO_IS_RESOURCE_LIMIT(e)) {
warn_too_many_conns();
+ /* Exhaustion; tell the OOS handler */
+ connection_check_oos(get_n_open_sockets(), 1);
return 0;
}
/* else there was a real error. */
log_warn(LD_NET,"accept() failed: %s. Closing listener.",
tor_socket_strerror(e));
connection_mark_for_close(conn);
+ /* Tell the OOS handler about this too */
+ connection_check_oos(get_n_open_sockets(), 0);
return -1;
}
log_debug(LD_NET,
"Connection accepted on socket %d (child of fd %d).",
(int)news,(int)conn->s);
+ /* We accepted a new conn; run OOS handler */
+ connection_check_oos(get_n_open_sockets(), 0);
+
if (make_socket_reuseable(news) < 0) {
if (tor_socket_errno(news) == EINVAL) {
/* This can happen on OSX if we get a badly timed shutdown. */
@@ -1661,12 +1694,18 @@ connection_connect_sockaddr,(connection_t *conn,
s = tor_open_socket_nonblocking(protocol_family, SOCK_STREAM, proto);
if (! SOCKET_OK(s)) {
+ /*
+ * Early OOS handler calls; it matters if it's an exhaustion-related
+ * error or not.
+ */
*socket_error = tor_socket_errno(s);
if (ERRNO_IS_RESOURCE_LIMIT(*socket_error)) {
warn_too_many_conns();
+ connection_check_oos(get_n_open_sockets(), 1);
} else {
log_warn(LD_NET,"Error creating network socket: %s",
tor_socket_strerror(*socket_error));
+ connection_check_oos(get_n_open_sockets(), 0);
}
return -1;
}
@@ -1676,6 +1715,13 @@ connection_connect_sockaddr,(connection_t *conn,
tor_socket_strerror(errno));
}
+ /*
+ * We've got the socket open; give the OOS handler a chance to check
+ * against configuured maximum socket number, but tell it no exhaustion
+ * failure.
+ */
+ connection_check_oos(get_n_open_sockets(), 0);
+
if (bindaddr && bind(s, bindaddr, bindaddr_len) < 0) {
*socket_error = tor_socket_errno(s);
log_warn(LD_NET,"Error binding network socket: %s",
@@ -4454,6 +4500,256 @@ connection_reached_eof(connection_t *conn)
}
}
+/** Comparator for the two-orconn case in OOS victim sort */
+static int
+oos_victim_comparator_for_orconns(or_connection_t *a, or_connection_t *b)
+{
+ int a_circs, b_circs;
+ /* Fewer circuits == higher priority for OOS kill, sort earlier */
+
+ a_circs = connection_or_get_num_circuits(a);
+ b_circs = connection_or_get_num_circuits(b);
+
+ if (a_circs < b_circs) return -1;
+ else if (b_circs > a_circs) return 1;
+ else return 0;
+}
+
+/** Sort comparator for OOS victims; better targets sort before worse
+ * ones. */
+static int
+oos_victim_comparator(const void **a_v, const void **b_v)
+{
+ connection_t *a = NULL, *b = NULL;
+
+ /* Get connection pointers out */
+
+ a = (connection_t *)(*a_v);
+ b = (connection_t *)(*b_v);
+
+ tor_assert(a != NULL);
+ tor_assert(b != NULL);
+
+ /*
+ * We always prefer orconns as victims currently; we won't even see
+ * these non-orconn cases, but if we do, sort them after orconns.
+ */
+ if (a->type == CONN_TYPE_OR && b->type == CONN_TYPE_OR) {
+ return oos_victim_comparator_for_orconns(TO_OR_CONN(a), TO_OR_CONN(b));
+ } else {
+ /*
+ * One isn't an orconn; if one is, it goes first. We currently have no
+ * opinions about cases where neither is an orconn.
+ */
+ if (a->type == CONN_TYPE_OR) return -1;
+ else if (b->type == CONN_TYPE_OR) return 1;
+ else return 0;
+ }
+}
+
+/** Pick n victim connections for the OOS handler and return them in a
+ * smartlist.
+ */
+MOCK_IMPL(STATIC smartlist_t *,
+pick_oos_victims, (int n))
+{
+ smartlist_t *eligible = NULL, *victims = NULL;
+ smartlist_t *conns;
+ int conn_counts_by_type[CONN_TYPE_MAX_ + 1], i;
+
+ /*
+ * Big damn assumption (someone improve this someday!):
+ *
+ * Socket exhaustion normally happens on high-volume relays, and so
+ * most of the connections involved are orconns. We should pick victims
+ * by assembling a list of all orconns, and sorting them in order of
+ * how much 'damage' by some metric we'd be doing by dropping them.
+ *
+ * If we move on from orconns, we should probably think about incoming
+ * directory connections next, or exit connections. Things we should
+ * probably never kill are controller connections and listeners.
+ *
+ * This function will count how many connections of different types
+ * exist and log it for purposes of gathering data on typical OOS
+ * situations to guide future improvements.
+ */
+
+ /* First, get the connection array */
+ conns = get_connection_array();
+ /*
+ * Iterate it and pick out eligible connection types, and log some stats
+ * along the way.
+ */
+ eligible = smartlist_new();
+ memset(conn_counts_by_type, 0, sizeof(conn_counts_by_type));
+ SMARTLIST_FOREACH_BEGIN(conns, connection_t *, c) {
+ /* Bump the counter */
+ tor_assert(c->type <= CONN_TYPE_MAX_);
+ ++(conn_counts_by_type[c->type]);
+
+ /* Skip anything without a socket we can free */
+ if (!(SOCKET_OK(c->s))) {
+ continue;
+ }
+
+ /* Skip anything we would count as moribund */
+ if (connection_is_moribund(c)) {
+ continue;
+ }
+
+ switch (c->type) {
+ case CONN_TYPE_OR:
+ /* We've got an orconn, it's eligible to be OOSed */
+ smartlist_add(eligible, c);
+ break;
+ default:
+ /* We don't know what to do with it, ignore it */
+ break;
+ }
+ } SMARTLIST_FOREACH_END(c);
+
+ /* Log some stats */
+ if (smartlist_len(conns) > 0) {
+ /* At least one counter must be non-zero */
+ log_info(LD_NET, "Some stats on conn types seen during OOS follow");
+ for (i = CONN_TYPE_MIN_; i <= CONN_TYPE_MAX_; ++i) {
+ /* Did we see any? */
+ if (conn_counts_by_type[i] > 0) {
+ log_info(LD_NET, "%s: %d conns",
+ conn_type_to_string(i),
+ conn_counts_by_type[i]);
+ }
+ }
+ log_info(LD_NET, "Done with OOS conn type stats");
+ }
+
+ /* Did we find more eligible targets than we want to kill? */
+ if (smartlist_len(eligible) > n) {
+ /* Sort the list in order of target preference */
+ smartlist_sort(eligible, oos_victim_comparator);
+ /* Pick first n as victims */
+ victims = smartlist_new();
+ for (i = 0; i < n; ++i) {
+ smartlist_add(victims, smartlist_get(eligible, i));
+ }
+ /* Free the original list */
+ smartlist_free(eligible);
+ } else {
+ /* No, we can just call them all victims */
+ victims = eligible;
+ }
+
+ return victims;
+}
+
+/** Kill a list of connections for the OOS handler. */
+MOCK_IMPL(STATIC void,
+kill_conn_list_for_oos, (smartlist_t *conns))
+{
+ if (!conns) return;
+
+ SMARTLIST_FOREACH_BEGIN(conns, connection_t *, c) {
+ /* Make sure the channel layer gets told about orconns */
+ if (c->type == CONN_TYPE_OR) {
+ connection_or_close_for_error(TO_OR_CONN(c), 1);
+ } else {
+ connection_mark_for_close(c);
+ }
+ } SMARTLIST_FOREACH_END(c);
+
+ log_notice(LD_NET,
+ "OOS handler marked %d connections",
+ smartlist_len(conns));
+}
+
+/** Out-of-Sockets handler; n_socks is the current number of open
+ * sockets, and failed is non-zero if a socket exhaustion related
+ * error immediately preceded this call. This is where to do
+ * circuit-killing heuristics as needed.
+ */
+void
+connection_check_oos(int n_socks, int failed)
+{
+ int target_n_socks = 0, moribund_socks, socks_to_kill;
+ smartlist_t *conns;
+
+ /* Early exit: is OOS checking disabled? */
+ if (get_options()->DisableOOSCheck) {
+ return;
+ }
+
+ /* Sanity-check args */
+ tor_assert(n_socks >= 0);
+
+ /*
+ * Make some log noise; keep it at debug level since this gets a chance
+ * to run on every connection attempt.
+ */
+ log_debug(LD_NET,
+ "Running the OOS handler (%d open sockets, %s)",
+ n_socks, (failed != 0) ? "exhaustion seen" : "no exhaustion");
+
+ /*
+ * Check if we're really handling an OOS condition, and if so decide how
+ * many sockets we want to get down to. Be sure we check if the threshold
+ * is distinct from zero first; it's possible for this to be called a few
+ * times before we've finished reading the config.
+ */
+ if (n_socks >= get_options()->ConnLimit_high_thresh &&
+ get_options()->ConnLimit_high_thresh != 0 &&
+ get_options()->ConnLimit_ != 0) {
+ /* Try to get down to the low threshold */
+ target_n_socks = get_options()->ConnLimit_low_thresh;
+ log_notice(LD_NET,
+ "Current number of sockets %d is greater than configured "
+ "limit %d; OOS handler trying to get down to %d",
+ n_socks, get_options()->ConnLimit_high_thresh,
+ target_n_socks);
+ } else if (failed) {
+ /*
+ * If we're not at the limit but we hit a socket exhaustion error, try to
+ * drop some (but not as aggressively as ConnLimit_low_threshold, which is
+ * 3/4 of ConnLimit_)
+ */
+ target_n_socks = (n_socks * 9) / 10;
+ log_notice(LD_NET,
+ "We saw socket exhaustion at %d open sockets; OOS handler "
+ "trying to get down to %d",
+ n_socks, target_n_socks);
+ }
+
+ if (target_n_socks > 0) {
+ /*
+ * It's an OOS!
+ *
+ * Count moribund sockets; it's be important that anything we decide
+ * to get rid of here but don't immediately close get counted as moribund
+ * on subsequent invocations so we don't try to kill too many things if
+ * connection_check_oos() gets called multiple times.
+ */
+ moribund_socks = connection_count_moribund();
+
+ if (moribund_socks < n_socks - target_n_socks) {
+ socks_to_kill = n_socks - target_n_socks - moribund_socks;
+
+ conns = pick_oos_victims(socks_to_kill);
+ if (conns) {
+ kill_conn_list_for_oos(conns);
+ log_notice(LD_NET,
+ "OOS handler killed %d conns", smartlist_len(conns));
+ smartlist_free(conns);
+ } else {
+ log_notice(LD_NET, "OOS handler failed to pick any victim conns");
+ }
+ } else {
+ log_notice(LD_NET,
+ "Not killing any sockets for OOS because there are %d "
+ "already moribund, and we only want to eliminate %d",
+ moribund_socks, n_socks - target_n_socks);
+ }
+ }
+}
+
/** Log how many bytes are used by buffers of different kinds and sizes. */
void
connection_dump_buffer_mem_stats(int severity)
diff --git a/src/or/connection.h b/src/or/connection.h
index f8e0f73246..d25e002fa4 100644
--- a/src/or/connection.h
+++ b/src/or/connection.h
@@ -34,8 +34,8 @@ void connection_about_to_close_connection(connection_t *conn);
void connection_close_immediate(connection_t *conn);
void connection_mark_for_close_(connection_t *conn,
int line, const char *file);
-void connection_mark_for_close_internal_(connection_t *conn,
- int line, const char *file);
+MOCK_DECL(void, connection_mark_for_close_internal_,
+ (connection_t *conn, int line, const char *file));
#define connection_mark_for_close(c) \
connection_mark_for_close_((c), __LINE__, SHORT_FILE__)
@@ -247,6 +247,22 @@ void clock_skew_warning(const connection_t *conn, long apparent_skew,
int trusted, log_domain_mask_t domain,
const char *received, const char *source);
+/** Check if a connection is on the way out so the OOS handler doesn't try
+ * to kill more than it needs. */
+static inline int
+connection_is_moribund(connection_t *conn)
+{
+ if (conn != NULL &&
+ (conn->conn_array_index < 0 ||
+ conn->marked_for_close)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+void connection_check_oos(int n_socks, int failed);
+
#ifdef CONNECTION_PRIVATE
STATIC void connection_free_(connection_t *conn);
@@ -265,6 +281,9 @@ MOCK_DECL(STATIC int,connection_connect_sockaddr,
const struct sockaddr *bindaddr,
socklen_t bindaddr_len,
int *socket_error));
+MOCK_DECL(STATIC void, kill_conn_list_for_oos, (smartlist_t *conns));
+MOCK_DECL(STATIC smartlist_t *, pick_oos_victims, (int n));
+
#endif
#endif
diff --git a/src/or/connection_or.c b/src/or/connection_or.c
index 1f0c4bdef5..72d8e13e90 100644
--- a/src/or/connection_or.c
+++ b/src/or/connection_or.c
@@ -394,8 +394,8 @@ connection_or_change_state(or_connection_t *conn, uint8_t state)
* be an or_connection_t field, but it got moved to channel_t and we
* shouldn't maintain two copies. */
-int
-connection_or_get_num_circuits(or_connection_t *conn)
+MOCK_IMPL(int,
+connection_or_get_num_circuits, (or_connection_t *conn))
{
tor_assert(conn);
diff --git a/src/or/connection_or.h b/src/or/connection_or.h
index e2ec47a4f2..2e8c6066cc 100644
--- a/src/or/connection_or.h
+++ b/src/or/connection_or.h
@@ -64,7 +64,7 @@ void connection_or_init_conn_from_address(or_connection_t *conn,
int connection_or_client_learned_peer_id(or_connection_t *conn,
const uint8_t *peer_id);
time_t connection_or_client_used(or_connection_t *conn);
-int connection_or_get_num_circuits(or_connection_t *conn);
+MOCK_DECL(int, connection_or_get_num_circuits, (or_connection_t *conn));
void or_handshake_state_free(or_handshake_state_t *state);
void or_handshake_state_record_cell(or_connection_t *conn,
or_handshake_state_t *state,
diff --git a/src/or/main.c b/src/or/main.c
index 4dbd9a005b..03c2b7ed58 100644
--- a/src/or/main.c
+++ b/src/or/main.c
@@ -381,8 +381,8 @@ connection_in_array(connection_t *conn)
/** Set <b>*array</b> to an array of all connections. <b>*array</b> must not
* be modified.
*/
-smartlist_t *
-get_connection_array(void)
+MOCK_IMPL(smartlist_t *,
+get_connection_array, (void))
{
if (!connection_array)
connection_array = smartlist_new();
@@ -651,6 +651,23 @@ close_closeable_connections(void)
}
}
+/** Count moribund connections for the OOS handler */
+MOCK_IMPL(int,
+connection_count_moribund, (void))
+{
+ int moribund = 0;
+
+ /*
+ * Count things we'll try to kill when close_closeable_connections()
+ * runs next.
+ */
+ SMARTLIST_FOREACH_BEGIN(closeable_connection_lst, connection_t *, conn) {
+ if (SOCKET_OK(conn->s) && connection_is_moribund(conn)) ++moribund;
+ } SMARTLIST_FOREACH_END(conn);
+
+ return moribund;
+}
+
/** Libevent callback: this gets invoked when (connection_t*)<b>conn</b> has
* some data to read. */
static void
diff --git a/src/or/main.h b/src/or/main.h
index 31a22de424..0220ae3c57 100644
--- a/src/or/main.h
+++ b/src/or/main.h
@@ -25,7 +25,7 @@ int connection_in_array(connection_t *conn);
void add_connection_to_closeable_list(connection_t *conn);
int connection_is_on_closeable_list(connection_t *conn);
-smartlist_t *get_connection_array(void);
+MOCK_DECL(smartlist_t *, get_connection_array, (void));
MOCK_DECL(uint64_t,get_bytes_read,(void));
MOCK_DECL(uint64_t,get_bytes_written,(void));
@@ -47,6 +47,8 @@ MOCK_DECL(void,connection_start_writing,(connection_t *conn));
void connection_stop_reading_from_linked_conn(connection_t *conn);
+MOCK_DECL(int, connection_count_moribund, (void));
+
void directory_all_unreachable(time_t now);
void directory_info_has_arrived(time_t now, int from_cache, int suppress_logs);
diff --git a/src/or/or.h b/src/or/or.h
index 43b31c0fdd..be58fa0d0a 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3699,6 +3699,10 @@ typedef struct {
int ConnLimit; /**< Demanded minimum number of simultaneous connections. */
int ConnLimit_; /**< Maximum allowed number of simultaneous connections. */
+ int ConnLimit_high_thresh; /**< start trying to lower socket usage if we
+ * have this many. */
+ int ConnLimit_low_thresh; /**< try to get down to here after socket
+ * exhaustion. */
int RunAsDaemon; /**< If true, run in the background. (Unix only) */
int FascistFirewall; /**< Whether to prefer ORs reachable on open ports. */
smartlist_t *FirewallPorts; /**< Which ports our firewall allows
@@ -4454,6 +4458,9 @@ typedef struct {
* participate in the protocol. If on (default), a flag is added to the
* vote indicating participation. */
int AuthDirSharedRandomness;
+
+ /** If 1, we skip all OOS checks. */
+ int DisableOOSCheck;
} or_options_t;
/** Persistent state for an onion router, as saved to disk. */