diff options
Diffstat (limited to 'src/or/connection.c')
-rw-r--r-- | src/or/connection.c | 886 |
1 files changed, 434 insertions, 452 deletions
diff --git a/src/or/connection.c b/src/or/connection.c index 4fbbaf1abd..188276026f 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -8,10 +8,55 @@ * \file connection.c * \brief General high-level functions to handle reading and writing * on connections. + * + * Each connection (ideally) represents a TLS connection, a TCP socket, a unix + * socket, or a UDP socket on which reads and writes can occur. (But see + * connection_edge.c for cases where connections can also represent streams + * that do not have a corresponding socket.) + * + * The module implements the abstract type, connection_t. The subtypes are: + * <ul> + * <li>listener_connection_t, implemented here in connection.c + * <li>dir_connection_t, implemented in directory.c + * <li>or_connection_t, implemented in connection_or.c + * <li>edge_connection_t, implemented in connection_edge.c, along with + * its subtype(s): + * <ul><li>entry_connection_t, also implemented in connection_edge.c + * </ul> + * <li>control_connection_t, implemented in control.c + * </ul> + * + * The base type implemented in this module is responsible for basic + * rate limiting, flow control, and marshalling bytes onto and off of the + * network (either directly or via TLS). + * + * Connections are registered with the main loop with connection_add(). As + * they become able to read or write register the fact with the event main + * loop by calling connection_watch_events(), connection_start_reading(), or + * connection_start_writing(). When they no longer want to read or write, + * they call connection_stop_reading() or connection_stop_writing(). + * + * To queue data to be written on a connection, call + * connection_write_to_buf(). When data arrives, the + * connection_process_inbuf() callback is invoked, which dispatches to a + * type-specific function (such as connection_edge_process_inbuf() for + * example). Connection types that need notice of when data has been written + * receive notification via connection_flushed_some() and + * connection_finished_flushing(). These functions all delegate to + * type-specific implementations. + * + * Additionally, beyond the core of connection_t, this module also implements: + * <ul> + * <li>Listeners, which wait for incoming sockets and launch connections + * <li>Outgoing SOCKS proxy support + * <li>Outgoing HTTP proxy support + * <li>An out-of-sockets handler for dealing with socket exhaustion + * </ul> **/ #define CONNECTION_PRIVATE #include "or.h" +#include "bridges.h" #include "buffers.h" /* * Define this so we get channel internal functions, since we're implementing @@ -38,6 +83,7 @@ #include "ext_orport.h" #include "geoip.h" #include "main.h" +#include "hs_common.h" #include "nodelist.h" #include "policies.h" #include "reasons.h" @@ -52,10 +98,6 @@ #include "sandbox.h" #include "transports.h" -#ifdef USE_BUFFEREVENTS -#include <event2/event.h> -#endif - #ifdef HAVE_PWD_H #include <pwd.h> #endif @@ -75,10 +117,8 @@ static void connection_init(time_t now, connection_t *conn, int type, static int connection_init_accepted_conn(connection_t *conn, const listener_connection_t *listener); static int connection_handle_listener_read(connection_t *conn, int new_type); -#ifndef USE_BUFFEREVENTS static int connection_bucket_should_increase(int bucket, or_connection_t *conn); -#endif static int connection_finished_flushing(connection_t *conn); static int connection_flushed_some(connection_t *conn); static int connection_finished_connecting(connection_t *conn); @@ -94,11 +134,13 @@ static int connection_read_https_proxy_response(connection_t *conn); static void connection_send_socks5_connect(connection_t *conn); static const char *proxy_type_to_string(int proxy_type); static int get_proxy_type(void); +const tor_addr_t *conn_get_outbound_address(sa_family_t family, + const or_options_t *options, unsigned int conn_type); /** The last addresses that our network interface seemed to have been * binding to. We use this as one way to detect when our IP changes. * - * XXX024 We should really use the entire list of interfaces here. + * XXXX+ We should really use the entire list of interfaces here. **/ static tor_addr_t *last_interface_ipv4 = NULL; /* DOCDOC last_interface_ipv6 */ @@ -236,26 +278,6 @@ conn_state_to_string(int type, int state) return buf; } -#ifdef USE_BUFFEREVENTS -/** Return true iff the connection's type is one that can use a - bufferevent-based implementation. */ -int -connection_type_uses_bufferevent(connection_t *conn) -{ - switch (conn->type) { - case CONN_TYPE_AP: - case CONN_TYPE_EXIT: - case CONN_TYPE_DIR: - case CONN_TYPE_CONTROL: - case CONN_TYPE_OR: - case CONN_TYPE_EXT_OR: - return 1; - default: - return 0; - } -} -#endif - /** Allocate and return a new dir_connection_t, initialized as by * connection_init(). */ dir_connection_t * @@ -427,13 +449,11 @@ connection_init(time_t now, connection_t *conn, int type, int socket_family) conn->type = type; conn->socket_family = socket_family; -#ifndef USE_BUFFEREVENTS if (!connection_is_listener(conn)) { /* listeners never use their buf */ conn->inbuf = buf_new(); conn->outbuf = buf_new(); } -#endif conn->timestamp_created = now; conn->timestamp_lastread = now; @@ -577,10 +597,10 @@ connection_free_(connection_t *conn) if (entry_conn->socks_request) socks_request_free(entry_conn->socks_request); if (entry_conn->pending_optimistic_data) { - generic_buffer_free(entry_conn->pending_optimistic_data); + buf_free(entry_conn->pending_optimistic_data); } if (entry_conn->sending_optimistic_data) { - generic_buffer_free(entry_conn->sending_optimistic_data); + buf_free(entry_conn->sending_optimistic_data); } } if (CONN_IS_EDGE(conn)) { @@ -603,15 +623,6 @@ connection_free_(connection_t *conn) tor_event_free(conn->read_event); tor_event_free(conn->write_event); conn->read_event = conn->write_event = NULL; - IF_HAS_BUFFEREVENT(conn, { - /* This was a workaround to handle bugs in some old versions of libevent - * where callbacks can occur after calling bufferevent_free(). Setting - * the callbacks to NULL prevented this. It shouldn't be necessary any - * more, but let's not tempt fate for now. */ - bufferevent_setcb(conn->bufev, NULL, NULL, NULL, NULL); - bufferevent_free(conn->bufev); - conn->bufev = NULL; - }); if (conn->type == CONN_TYPE_DIR) { dir_connection_t *dir_conn = TO_DIR_CONN(conn); @@ -625,6 +636,11 @@ connection_free_(connection_t *conn) cached_dir_decref(dir_conn->cached_dir); rend_data_free(dir_conn->rend_data); + if (dir_conn->guard_state) { + /* Cancel before freeing, if it's still there. */ + entry_guard_cancel(&dir_conn->guard_state); + } + circuit_guard_state_free(dir_conn->guard_state); } if (SOCKET_OK(conn->s)) { @@ -636,7 +652,7 @@ connection_free_(connection_t *conn) if (conn->type == CONN_TYPE_OR && !tor_digest_is_zero(TO_OR_CONN(conn)->identity_digest)) { log_warn(LD_BUG, "called on OR conn with non-zeroed identity_digest"); - connection_or_remove_from_identity_map(TO_OR_CONN(conn)); + connection_or_clear_identity(TO_OR_CONN(conn)); } if (conn->type == CONN_TYPE_OR || conn->type == CONN_TYPE_EXT_OR) { connection_or_remove_from_ext_or_id_map(TO_OR_CONN(conn)); @@ -645,13 +661,6 @@ connection_free_(connection_t *conn) tor_free(TO_OR_CONN(conn)->ext_or_transport); } -#ifdef USE_BUFFEREVENTS - if (conn->type == CONN_TYPE_OR && TO_OR_CONN(conn)->bucket_cfg) { - ev_token_bucket_cfg_free(TO_OR_CONN(conn)->bucket_cfg); - TO_OR_CONN(conn)->bucket_cfg = NULL; - } -#endif - memwipe(mem, 0xCC, memlen); /* poison memory */ tor_free(mem); } @@ -665,9 +674,7 @@ connection_free,(connection_t *conn)) return; tor_assert(!connection_is_on_closeable_list(conn)); tor_assert(!connection_in_array(conn)); - if (conn->linked_conn) { - log_err(LD_BUG, "Called with conn->linked_conn still set."); - tor_fragile_assert(); + if (BUG(conn->linked_conn)) { conn->linked_conn->linked_conn = NULL; if (! conn->linked_conn->marked_for_close && conn->linked_conn->reading_from_linked_conn) @@ -676,7 +683,7 @@ connection_free,(connection_t *conn)) } if (connection_speaks_cells(conn)) { if (!tor_digest_is_zero(TO_OR_CONN(conn)->identity_digest)) { - connection_or_remove_from_identity_map(TO_OR_CONN(conn)); + connection_or_clear_identity(TO_OR_CONN(conn)); } } if (conn->type == CONN_TYPE_CONTROL) { @@ -800,9 +807,9 @@ connection_mark_for_close_(connection_t *conn, int line, const char *file) * For all other cases, use connection_mark_and_flush() instead, which * checks for or_connection_t properly, instead. See below. */ -void -connection_mark_for_close_internal_(connection_t *conn, - int line, const char *file) +MOCK_IMPL(void, +connection_mark_for_close_internal_, (connection_t *conn, + int line, const char *file)) { assert_connection_ok(conn,0); tor_assert(line); @@ -1136,6 +1143,7 @@ connection_listener_new(const struct sockaddr *listensockaddr, int start_reading = 0; static int global_next_session_group = SESSION_GROUP_FIRST_AUTO; tor_addr_t addr; + int exhaustion = 0; if (listensockaddr->sa_family == AF_INET || listensockaddr->sa_family == AF_INET6) { @@ -1154,6 +1162,11 @@ connection_listener_new(const struct sockaddr *listensockaddr, int e = tor_socket_errno(s); if (ERRNO_IS_RESOURCE_LIMIT(e)) { warn_too_many_conns(); + /* + * We'll call the OOS handler at the error exit, so set the + * exhaustion flag for it. + */ + exhaustion = 1; } else { log_warn(LD_NET, "Socket creation failed: %s", tor_socket_strerror(e)); @@ -1272,6 +1285,11 @@ connection_listener_new(const struct sockaddr *listensockaddr, int e = tor_socket_errno(s); if (ERRNO_IS_RESOURCE_LIMIT(e)) { warn_too_many_conns(); + /* + * We'll call the OOS handler at the error exit, so set the + * exhaustion flag for it. + */ + exhaustion = 1; } else { log_warn(LD_NET,"Socket creation failed: %s.", strerror(e)); } @@ -1390,6 +1408,12 @@ connection_listener_new(const struct sockaddr *listensockaddr, dnsserv_configure_listener(conn); } + /* + * Normal exit; call the OOS handler since connection count just changed; + * the exhaustion flag will always be zero here though. + */ + connection_check_oos(get_n_open_sockets(), 0); + return conn; err: @@ -1398,6 +1422,9 @@ connection_listener_new(const struct sockaddr *listensockaddr, if (conn) connection_free(conn); + /* Call the OOS handler, indicate if we saw an exhaustion-related error */ + connection_check_oos(get_n_open_sockets(), exhaustion); + return NULL; } @@ -1488,21 +1515,34 @@ connection_handle_listener_read(connection_t *conn, int new_type) if (!SOCKET_OK(news)) { /* accept() error */ int e = tor_socket_errno(conn->s); if (ERRNO_IS_ACCEPT_EAGAIN(e)) { - return 0; /* they hung up before we could accept(). that's fine. */ + /* + * they hung up before we could accept(). that's fine. + * + * give the OOS handler a chance to run though + */ + connection_check_oos(get_n_open_sockets(), 0); + return 0; } else if (ERRNO_IS_RESOURCE_LIMIT(e)) { warn_too_many_conns(); + /* Exhaustion; tell the OOS handler */ + connection_check_oos(get_n_open_sockets(), 1); return 0; } /* else there was a real error. */ log_warn(LD_NET,"accept() failed: %s. Closing listener.", tor_socket_strerror(e)); connection_mark_for_close(conn); + /* Tell the OOS handler about this too */ + connection_check_oos(get_n_open_sockets(), 0); return -1; } log_debug(LD_NET, "Connection accepted on socket %d (child of fd %d).", (int)news,(int)conn->s); + /* We accepted a new conn; run OOS handler */ + connection_check_oos(get_n_open_sockets(), 0); + if (make_socket_reuseable(news) < 0) { if (tor_socket_errno(news) == EINVAL) { /* This can happen on OSX if we get a badly timed shutdown. */ @@ -1563,16 +1603,19 @@ connection_handle_listener_read(connection_t *conn, int new_type) /* remember the remote address */ tor_addr_copy(&newconn->addr, &addr); - newconn->port = port; - newconn->address = tor_dup_addr(&addr); + if (new_type == CONN_TYPE_AP && conn->socket_family == AF_UNIX) { + newconn->port = 0; + newconn->address = tor_strdup(conn->address); + } else { + newconn->port = port; + newconn->address = tor_addr_to_str_dup(&addr); + } if (new_type == CONN_TYPE_AP && conn->socket_family != AF_UNIX) { log_info(LD_NET, "New SOCKS connection opened from %s.", fmt_and_decorate_addr(&addr)); } if (new_type == CONN_TYPE_AP && conn->socket_family == AF_UNIX) { - newconn->port = 0; - newconn->address = tor_strdup(conn->address); log_info(LD_NET, "New SOCKS AF_UNIX connection opened"); } if (new_type == CONN_TYPE_CONTROL) { @@ -1707,12 +1750,18 @@ connection_connect_sockaddr,(connection_t *conn, s = tor_open_socket_nonblocking(protocol_family, SOCK_STREAM, proto); if (! SOCKET_OK(s)) { + /* + * Early OOS handler calls; it matters if it's an exhaustion-related + * error or not. + */ *socket_error = tor_socket_errno(s); if (ERRNO_IS_RESOURCE_LIMIT(*socket_error)) { warn_too_many_conns(); + connection_check_oos(get_n_open_sockets(), 1); } else { log_warn(LD_NET,"Error creating network socket: %s", tor_socket_strerror(*socket_error)); + connection_check_oos(get_n_open_sockets(), 0); } return -1; } @@ -1722,6 +1771,13 @@ connection_connect_sockaddr,(connection_t *conn, tor_socket_strerror(errno)); } + /* + * We've got the socket open; give the OOS handler a chance to check + * against configured maximum socket number, but tell it no exhaustion + * failure. + */ + connection_check_oos(get_n_open_sockets(), 0); + if (bindaddr && bind(s, bindaddr, bindaddr_len) < 0) { *socket_error = tor_socket_errno(s); log_warn(LD_NET,"Error binding network socket: %s", @@ -1836,6 +1892,55 @@ connection_connect_log_client_use_ip_version(const connection_t *conn) } } +/** Retrieve the outbound address depending on the protocol (IPv4 or IPv6) + * and the connection type (relay, exit, ...) + * Return a socket address or NULL in case nothing is configured. + **/ +const tor_addr_t * +conn_get_outbound_address(sa_family_t family, + const or_options_t *options, unsigned int conn_type) +{ + const tor_addr_t *ext_addr = NULL; + + int fam_index; + switch (family) { + case AF_INET: + fam_index = 0; + break; + case AF_INET6: + fam_index = 1; + break; + default: + return NULL; + } + + // If an exit connection, use the exit address (if present) + if (conn_type == CONN_TYPE_EXIT) { + if (!tor_addr_is_null( + &options->OutboundBindAddresses[OUTBOUND_ADDR_EXIT][fam_index])) { + ext_addr = &options->OutboundBindAddresses[OUTBOUND_ADDR_EXIT] + [fam_index]; + } else if (!tor_addr_is_null( + &options->OutboundBindAddresses[OUTBOUND_ADDR_EXIT_AND_OR] + [fam_index])) { + ext_addr = &options->OutboundBindAddresses[OUTBOUND_ADDR_EXIT_AND_OR] + [fam_index]; + } + } else { // All non-exit connections + if (!tor_addr_is_null( + &options->OutboundBindAddresses[OUTBOUND_ADDR_OR][fam_index])) { + ext_addr = &options->OutboundBindAddresses[OUTBOUND_ADDR_OR] + [fam_index]; + } else if (!tor_addr_is_null( + &options->OutboundBindAddresses[OUTBOUND_ADDR_EXIT_AND_OR] + [fam_index])) { + ext_addr = &options->OutboundBindAddresses[OUTBOUND_ADDR_EXIT_AND_OR] + [fam_index]; + } + } + return ext_addr; +} + /** Take conn, make a nonblocking socket; try to connect to * addr:port (port arrives in *host order*). If fail, return -1 and if * applicable put your best guess about errno into *<b>socket_error</b>. @@ -1857,26 +1962,15 @@ connection_connect(connection_t *conn, const char *address, struct sockaddr *bind_addr = NULL; struct sockaddr *dest_addr; int dest_addr_len, bind_addr_len = 0; - const or_options_t *options = get_options(); - int protocol_family; /* Log if we didn't stick to ClientUseIPv4/6 or ClientPreferIPv6OR/DirPort */ connection_connect_log_client_use_ip_version(conn); - if (tor_addr_family(addr) == AF_INET6) - protocol_family = PF_INET6; - else - protocol_family = PF_INET; - if (!tor_addr_is_loopback(addr)) { const tor_addr_t *ext_addr = NULL; - if (protocol_family == AF_INET && - !tor_addr_is_null(&options->OutboundBindAddressIPv4_)) - ext_addr = &options->OutboundBindAddressIPv4_; - else if (protocol_family == AF_INET6 && - !tor_addr_is_null(&options->OutboundBindAddressIPv6_)) - ext_addr = &options->OutboundBindAddressIPv6_; + ext_addr = conn_get_outbound_address(tor_addr_family(addr), get_options(), + conn->type); if (ext_addr) { memset(&bind_addr_ss, 0, sizeof(bind_addr_ss)); bind_addr_len = tor_addr_to_sockaddr(ext_addr, 0, @@ -2242,7 +2336,7 @@ connection_send_socks5_connect(connection_t *conn) } else { /* AF_INET6 */ buf[3] = 4; reqsize += 16; - memcpy(buf + 4, tor_addr_to_in6(&conn->addr), 16); + memcpy(buf + 4, tor_addr_to_in6_addr8(&conn->addr), 16); memcpy(buf + 20, &port, 2); } @@ -2251,18 +2345,13 @@ connection_send_socks5_connect(connection_t *conn) conn->proxy_state = PROXY_SOCKS5_WANT_CONNECT_OK; } -/** Wrapper around fetch_from_(buf/evbuffer)_socks_client: see those functions +/** Wrapper around fetch_from_buf_socks_client: see that functions * for documentation of its behavior. */ static int connection_fetch_from_buf_socks_client(connection_t *conn, int state, char **reason) { - IF_HAS_BUFFEREVENT(conn, { - struct evbuffer *input = bufferevent_get_input(conn->bufev); - return fetch_from_evbuffer_socks_client(input, state, reason); - }) ELSE_IF_NO_BUFFEREVENT { - return fetch_from_buf_socks_client(conn->inbuf, state, reason); - } + return fetch_from_buf_socks_client(conn->inbuf, state, reason); } /** Call this from connection_*_process_inbuf() to advance the proxy @@ -2538,7 +2627,7 @@ retry_listener_ports(smartlist_t *old_conns, real_port, listensockaddr, sizeof(struct sockaddr_storage)); - address = tor_dup_addr(&port->addr); + address = tor_addr_to_str_dup(&port->addr); } if (listensockaddr) { @@ -2696,23 +2785,15 @@ connection_is_rate_limited(connection_t *conn) return 1; } -#ifdef USE_BUFFEREVENTS -static struct bufferevent_rate_limit_group *global_rate_limit = NULL; -#else -extern int global_read_bucket, global_write_bucket; -extern int global_relayed_read_bucket, global_relayed_write_bucket; - /** Did either global write bucket run dry last second? If so, * we are likely to run dry again this second, so be stingy with the * tokens we just put in. */ static int write_buckets_empty_last_second = 0; -#endif /** How many seconds of no active local circuits will make the * connection revert to the "relayed" bandwidth class? */ #define CLIENT_IDLE_TIME_FOR_PRIORITY 30 -#ifndef USE_BUFFEREVENTS /** Return 1 if <b>conn</b> should use tokens from the "relayed" * bandwidth rates, else 0. Currently, only OR conns with bandwidth * class 1, and directory conns that are serving data out, count. @@ -2824,20 +2905,6 @@ connection_bucket_write_limit(connection_t *conn, time_t now) return connection_bucket_round_robin(base, priority, global_bucket, conn_bucket); } -#else -static ssize_t -connection_bucket_read_limit(connection_t *conn, time_t now) -{ - (void) now; - return bufferevent_get_max_to_read(conn->bufev); -} -ssize_t -connection_bucket_write_limit(connection_t *conn, time_t now) -{ - (void) now; - return bufferevent_get_max_to_write(conn->bufev); -} -#endif /** Return 1 if the global write buckets are low enough that we * shouldn't send <b>attempt</b> bytes of low-priority directory stuff @@ -2861,12 +2928,8 @@ connection_bucket_write_limit(connection_t *conn, time_t now) int global_write_bucket_low(connection_t *conn, size_t attempt, int priority) { -#ifdef USE_BUFFEREVENTS - ssize_t smaller_bucket = bufferevent_get_max_to_write(conn->bufev); -#else int smaller_bucket = global_write_bucket < global_relayed_write_bucket ? global_write_bucket : global_relayed_write_bucket; -#endif if (authdir_mode(get_options()) && priority>1) return 0; /* there's always room to answer v2 if we're an auth dir */ @@ -2876,10 +2939,8 @@ global_write_bucket_low(connection_t *conn, size_t attempt, int priority) if (smaller_bucket < (int)attempt) return 1; /* not enough space no matter the priority */ -#ifndef USE_BUFFEREVENTS if (write_buckets_empty_last_second) return 1; /* we're already hitting our limits, no more please */ -#endif if (priority == 1) { /* old-style v1 query */ /* Could we handle *two* of these requests within the next two seconds? */ @@ -2927,29 +2988,6 @@ record_num_bytes_transferred_impl(connection_t *conn, rep_hist_note_exit_bytes(conn->port, num_written, num_read); } -#ifdef USE_BUFFEREVENTS -/** Wrapper around fetch_from_(buf/evbuffer)_socks_client: see those functions - * for documentation of its behavior. */ -static void -record_num_bytes_transferred(connection_t *conn, - time_t now, size_t num_read, size_t num_written) -{ - /* XXX024 check if this is necessary */ - if (num_written >= INT_MAX || num_read >= INT_MAX) { - log_err(LD_BUG, "Value out of range. num_read=%lu, num_written=%lu, " - "connection type=%s, state=%s", - (unsigned long)num_read, (unsigned long)num_written, - conn_type_to_string(conn->type), - conn_state_to_string(conn->type, conn->state)); - if (num_written >= INT_MAX) num_written = 1; - if (num_read >= INT_MAX) num_read = 1; - tor_fragile_assert(); - } - - record_num_bytes_transferred_impl(conn,now,num_read,num_written); -} -#endif - /** Helper: convert given <b>tvnow</b> time value to milliseconds since * midnight. */ static uint32_t @@ -2994,7 +3032,6 @@ connection_buckets_note_empty_ts(uint32_t *timestamp_var, *timestamp_var = msec_since_midnight(tvnow); } -#ifndef USE_BUFFEREVENTS /** Last time at which the global or relay buckets were emptied in msec * since midnight. */ static uint32_t global_relayed_read_emptied = 0, @@ -3325,92 +3362,6 @@ connection_bucket_should_increase(int bucket, or_connection_t *conn) return 1; } -#else -static void -connection_buckets_decrement(connection_t *conn, time_t now, - size_t num_read, size_t num_written) -{ - (void) conn; - (void) now; - (void) num_read; - (void) num_written; - /* Libevent does this for us. */ -} - -void -connection_bucket_refill(int seconds_elapsed, time_t now) -{ - (void) seconds_elapsed; - (void) now; - /* Libevent does this for us. */ -} -void -connection_bucket_init(void) -{ - const or_options_t *options = get_options(); - const struct timeval *tick = tor_libevent_get_one_tick_timeout(); - struct ev_token_bucket_cfg *bucket_cfg; - - uint64_t rate, burst; - if (options->RelayBandwidthRate) { - rate = options->RelayBandwidthRate; - burst = options->RelayBandwidthBurst; - } else { - rate = options->BandwidthRate; - burst = options->BandwidthBurst; - } - - /* This can't overflow, since TokenBucketRefillInterval <= 1000, - * and rate started out less than INT32_MAX. */ - rate = (rate * options->TokenBucketRefillInterval) / 1000; - - bucket_cfg = ev_token_bucket_cfg_new((uint32_t)rate, (uint32_t)burst, - (uint32_t)rate, (uint32_t)burst, - tick); - - if (!global_rate_limit) { - global_rate_limit = - bufferevent_rate_limit_group_new(tor_libevent_get_base(), bucket_cfg); - } else { - bufferevent_rate_limit_group_set_cfg(global_rate_limit, bucket_cfg); - } - ev_token_bucket_cfg_free(bucket_cfg); -} - -void -connection_get_rate_limit_totals(uint64_t *read_out, uint64_t *written_out) -{ - if (global_rate_limit == NULL) { - *read_out = *written_out = 0; - } else { - bufferevent_rate_limit_group_get_totals( - global_rate_limit, read_out, written_out); - } -} - -/** Perform whatever operations are needed on <b>conn</b> to enable - * rate-limiting. */ -void -connection_enable_rate_limiting(connection_t *conn) -{ - if (conn->bufev) { - if (!global_rate_limit) - connection_bucket_init(); - tor_add_bufferevent_to_rate_limit_group(conn->bufev, global_rate_limit); - } -} - -static void -connection_consider_empty_write_buckets(connection_t *conn) -{ - (void) conn; -} -static void -connection_consider_empty_read_buckets(connection_t *conn) -{ - (void) conn; -} -#endif /** Read bytes from conn-\>s and process them. * @@ -3644,7 +3595,7 @@ connection_read_to_buf(connection_t *conn, ssize_t *max_to_read, * take us over our read allotment, but really we shouldn't be * believing that SSL bytes are the same as TCP bytes anyway. */ int r2 = read_to_buf_tls(or_conn->tls, pending, conn->inbuf); - if (r2<0) { + if (BUG(r2<0)) { log_warn(LD_BUG, "apparently, reading pending bytes can fail."); return -1; } @@ -3741,171 +3692,11 @@ connection_read_to_buf(connection_t *conn, ssize_t *max_to_read, return 0; } -#ifdef USE_BUFFEREVENTS -/* XXXX These generic versions could be simplified by making them - type-specific */ - -/** Callback: Invoked whenever bytes are added to or drained from an input - * evbuffer. Used to track the number of bytes read. */ -static void -evbuffer_inbuf_callback(struct evbuffer *buf, - const struct evbuffer_cb_info *info, void *arg) -{ - connection_t *conn = arg; - (void) buf; - /* XXXX These need to get real counts on the non-nested TLS case. - NM */ - if (info->n_added) { - time_t now = approx_time(); - conn->timestamp_lastread = now; - record_num_bytes_transferred(conn, now, info->n_added, 0); - connection_consider_empty_read_buckets(conn); - if (conn->type == CONN_TYPE_AP) { - edge_connection_t *edge_conn = TO_EDGE_CONN(conn); - /*XXXX024 check for overflow*/ - edge_conn->n_read += (int)info->n_added; - } - } -} - -/** Callback: Invoked whenever bytes are added to or drained from an output - * evbuffer. Used to track the number of bytes written. */ -static void -evbuffer_outbuf_callback(struct evbuffer *buf, - const struct evbuffer_cb_info *info, void *arg) -{ - connection_t *conn = arg; - (void)buf; - if (info->n_deleted) { - time_t now = approx_time(); - conn->timestamp_lastwritten = now; - record_num_bytes_transferred(conn, now, 0, info->n_deleted); - connection_consider_empty_write_buckets(conn); - if (conn->type == CONN_TYPE_AP) { - edge_connection_t *edge_conn = TO_EDGE_CONN(conn); - /*XXXX024 check for overflow*/ - edge_conn->n_written += (int)info->n_deleted; - } - } -} - -/** Callback: invoked whenever a bufferevent has read data. */ -void -connection_handle_read_cb(struct bufferevent *bufev, void *arg) -{ - connection_t *conn = arg; - (void) bufev; - if (!conn->marked_for_close) { - if (connection_process_inbuf(conn, 1)<0) /* XXXX Always 1? */ - if (!conn->marked_for_close) - connection_mark_for_close(conn); - } -} - -/** Callback: invoked whenever a bufferevent has written data. */ -void -connection_handle_write_cb(struct bufferevent *bufev, void *arg) -{ - connection_t *conn = arg; - struct evbuffer *output; - if (connection_flushed_some(conn)<0) { - if (!conn->marked_for_close) - connection_mark_for_close(conn); - return; - } - - output = bufferevent_get_output(bufev); - if (!evbuffer_get_length(output)) { - connection_finished_flushing(conn); - if (conn->marked_for_close && conn->hold_open_until_flushed) { - conn->hold_open_until_flushed = 0; - if (conn->linked) { - /* send eof */ - bufferevent_flush(conn->bufev, EV_WRITE, BEV_FINISHED); - } - } - } -} - -/** Callback: invoked whenever a bufferevent has had an event (like a - * connection, or an eof, or an error) occur. */ -void -connection_handle_event_cb(struct bufferevent *bufev, short event, void *arg) -{ - connection_t *conn = arg; - (void) bufev; - if (conn->marked_for_close) - return; - - if (event & BEV_EVENT_CONNECTED) { - tor_assert(connection_state_is_connecting(conn)); - if (connection_finished_connecting(conn)<0) - return; - } - if (event & BEV_EVENT_EOF) { - if (!conn->marked_for_close) { - conn->inbuf_reached_eof = 1; - if (connection_reached_eof(conn)<0) - return; - } - } - if (event & BEV_EVENT_ERROR) { - int socket_error = evutil_socket_geterror(conn->s); - if (conn->type == CONN_TYPE_OR && - conn->state == OR_CONN_STATE_CONNECTING) { - connection_or_connect_failed(TO_OR_CONN(conn), - errno_to_orconn_end_reason(socket_error), - tor_socket_strerror(socket_error)); - } else if (CONN_IS_EDGE(conn)) { - edge_connection_t *edge_conn = TO_EDGE_CONN(conn); - if (!edge_conn->edge_has_sent_end) - connection_edge_end_errno(edge_conn); - if (conn->type == CONN_TYPE_AP && TO_ENTRY_CONN(conn)->socks_request) { - /* broken, don't send a socks reply back */ - TO_ENTRY_CONN(conn)->socks_request->has_finished = 1; - } - } - connection_close_immediate(conn); /* Connection is dead. */ - if (!conn->marked_for_close) - connection_mark_for_close(conn); - } -} - -/** Set up the generic callbacks for the bufferevent on <b>conn</b>. */ -void -connection_configure_bufferevent_callbacks(connection_t *conn) -{ - struct bufferevent *bufev; - struct evbuffer *input, *output; - tor_assert(conn->bufev); - bufev = conn->bufev; - bufferevent_setcb(bufev, - connection_handle_read_cb, - connection_handle_write_cb, - connection_handle_event_cb, - conn); - /* Set a fairly high write low-watermark so that we get the write callback - called whenever data is written to bring us under 128K. Leave the - high-watermark at 0. - */ - bufferevent_setwatermark(bufev, EV_WRITE, 128*1024, 0); - - input = bufferevent_get_input(bufev); - output = bufferevent_get_output(bufev); - evbuffer_add_cb(input, evbuffer_inbuf_callback, conn); - evbuffer_add_cb(output, evbuffer_outbuf_callback, conn); -} -#endif - /** A pass-through to fetch_from_buf. */ int connection_fetch_from_buf(char *string, size_t len, connection_t *conn) { - IF_HAS_BUFFEREVENT(conn, { - /* XXX overflow -seb */ - return (int)bufferevent_read(conn->bufev, string, len); - }) ELSE_IF_NO_BUFFEREVENT { - return fetch_from_buf(string, len, conn->inbuf); - } + return fetch_from_buf(string, len, conn->inbuf); } /** As fetch_from_buf_line(), but read from a connection's input buffer. */ @@ -3913,43 +3704,19 @@ int connection_fetch_from_buf_line(connection_t *conn, char *data, size_t *data_len) { - IF_HAS_BUFFEREVENT(conn, { - int r; - size_t eol_len=0; - struct evbuffer *input = bufferevent_get_input(conn->bufev); - struct evbuffer_ptr ptr = - evbuffer_search_eol(input, NULL, &eol_len, EVBUFFER_EOL_LF); - if (ptr.pos == -1) - return 0; /* No EOL found. */ - if ((size_t)ptr.pos+eol_len >= *data_len) { - return -1; /* Too long */ - } - *data_len = ptr.pos+eol_len; - r = evbuffer_remove(input, data, ptr.pos+eol_len); - tor_assert(r >= 0); - data[ptr.pos+eol_len] = '\0'; - return 1; - }) ELSE_IF_NO_BUFFEREVENT { - return fetch_from_buf_line(conn->inbuf, data, data_len); - } + return fetch_from_buf_line(conn->inbuf, data, data_len); } -/** As fetch_from_buf_http, but fetches from a connection's input buffer_t or - * its bufferevent as appropriate. */ +/** As fetch_from_buf_http, but fetches from a connection's input buffer_t as + * appropriate. */ int connection_fetch_from_buf_http(connection_t *conn, char **headers_out, size_t max_headerlen, char **body_out, size_t *body_used, size_t max_bodylen, int force_complete) { - IF_HAS_BUFFEREVENT(conn, { - struct evbuffer *input = bufferevent_get_input(conn->bufev); - return fetch_from_evbuffer_http(input, headers_out, max_headerlen, - body_out, body_used, max_bodylen, force_complete); - }) ELSE_IF_NO_BUFFEREVENT { - return fetch_from_buf_http(conn->inbuf, headers_out, max_headerlen, - body_out, body_used, max_bodylen, force_complete); - } + return fetch_from_buf_http(conn->inbuf, headers_out, max_headerlen, + body_out, body_used, max_bodylen, force_complete); } /** Return conn-\>outbuf_flushlen: how many bytes conn wants to flush @@ -4141,7 +3908,7 @@ connection_handle_write_impl(connection_t *conn, int force) or_conn->bytes_xmitted += result; or_conn->bytes_xmitted_by_tls += n_written; /* So we notice bytes were written even on error */ - /* XXXX024 This cast is safe since we can never write INT_MAX bytes in a + /* XXXX This cast is safe since we can never write INT_MAX bytes in a * single set of TLS operations. But it looks kinda ugly. If we refactor * the *_buf_tls functions, we should make them return ssize_t or size_t * or something. */ @@ -4253,7 +4020,7 @@ connection_handle_write(connection_t *conn, int force) * Try to flush data that's waiting for a write on <b>conn</b>. Return * -1 on failure, 0 on success. * - * Don't use this function for regular writing; the buffers/bufferevents + * Don't use this function for regular writing; the buffers * system should be good enough at scheduling writes there. Instead, this * function is for cases when we're about to exit or something and we want * to report it right away. @@ -4261,10 +4028,6 @@ connection_handle_write(connection_t *conn, int force) int connection_flush(connection_t *conn) { - IF_HAS_BUFFEREVENT(conn, { - int r = bufferevent_flush(conn->bufev, EV_WRITE, BEV_FLUSH); - return (r < 0) ? -1 : 0; - }); return connection_handle_write(conn, 1); } @@ -4293,22 +4056,6 @@ connection_write_to_buf_impl_,(const char *string, size_t len, if (conn->marked_for_close && !conn->hold_open_until_flushed) return; - IF_HAS_BUFFEREVENT(conn, { - if (zlib) { - int done = zlib < 0; - r = write_to_evbuffer_zlib(bufferevent_get_output(conn->bufev), - TO_DIR_CONN(conn)->zlib_state, - string, len, done); - } else { - r = bufferevent_write(conn->bufev, string, len); - } - if (r < 0) { - /* XXXX mark for close? */ - log_warn(LD_NET, "bufferevent_write failed! That shouldn't happen."); - } - return; - }); - old_datalen = buf_datalen(conn->outbuf); if (zlib) { dir_connection_t *dir_conn = TO_DIR_CONN(conn); @@ -4429,12 +4176,12 @@ connection_get_by_type_state_rendquery(int type, int state, (type == CONN_TYPE_DIR && TO_DIR_CONN(conn)->rend_data && !rend_cmp_service_ids(rendquery, - TO_DIR_CONN(conn)->rend_data->onion_address)) + rend_data_get_address(TO_DIR_CONN(conn)->rend_data))) || (CONN_IS_EDGE(conn) && TO_EDGE_CONN(conn)->rend_data && !rend_cmp_service_ids(rendquery, - TO_EDGE_CONN(conn)->rend_data->onion_address)) + rend_data_get_address(TO_EDGE_CONN(conn)->rend_data))) )); } @@ -4754,7 +4501,7 @@ connection_flushed_some(connection_t *conn) } /** We just finished flushing bytes to the appropriately low network layer, - * and there are no more bytes remaining in conn-\>outbuf, conn-\>bev, or + * and there are no more bytes remaining in conn-\>outbuf or * conn-\>tls to be flushed. * * This function just passes conn to the connection-specific @@ -4771,8 +4518,7 @@ connection_finished_flushing(connection_t *conn) // log_fn(LOG_DEBUG,"entered. Socket %u.", conn->s); - IF_HAS_NO_BUFFEREVENT(conn) - connection_stop_writing(conn); + connection_stop_writing(conn); switch (conn->type) { case CONN_TYPE_OR: @@ -4848,6 +4594,256 @@ connection_reached_eof(connection_t *conn) } } +/** Comparator for the two-orconn case in OOS victim sort */ +static int +oos_victim_comparator_for_orconns(or_connection_t *a, or_connection_t *b) +{ + int a_circs, b_circs; + /* Fewer circuits == higher priority for OOS kill, sort earlier */ + + a_circs = connection_or_get_num_circuits(a); + b_circs = connection_or_get_num_circuits(b); + + if (a_circs < b_circs) return 1; + else if (a_circs > b_circs) return -1; + else return 0; +} + +/** Sort comparator for OOS victims; better targets sort before worse + * ones. */ +static int +oos_victim_comparator(const void **a_v, const void **b_v) +{ + connection_t *a = NULL, *b = NULL; + + /* Get connection pointers out */ + + a = (connection_t *)(*a_v); + b = (connection_t *)(*b_v); + + tor_assert(a != NULL); + tor_assert(b != NULL); + + /* + * We always prefer orconns as victims currently; we won't even see + * these non-orconn cases, but if we do, sort them after orconns. + */ + if (a->type == CONN_TYPE_OR && b->type == CONN_TYPE_OR) { + return oos_victim_comparator_for_orconns(TO_OR_CONN(a), TO_OR_CONN(b)); + } else { + /* + * One isn't an orconn; if one is, it goes first. We currently have no + * opinions about cases where neither is an orconn. + */ + if (a->type == CONN_TYPE_OR) return -1; + else if (b->type == CONN_TYPE_OR) return 1; + else return 0; + } +} + +/** Pick n victim connections for the OOS handler and return them in a + * smartlist. + */ +MOCK_IMPL(STATIC smartlist_t *, +pick_oos_victims, (int n)) +{ + smartlist_t *eligible = NULL, *victims = NULL; + smartlist_t *conns; + int conn_counts_by_type[CONN_TYPE_MAX_ + 1], i; + + /* + * Big damn assumption (someone improve this someday!): + * + * Socket exhaustion normally happens on high-volume relays, and so + * most of the connections involved are orconns. We should pick victims + * by assembling a list of all orconns, and sorting them in order of + * how much 'damage' by some metric we'd be doing by dropping them. + * + * If we move on from orconns, we should probably think about incoming + * directory connections next, or exit connections. Things we should + * probably never kill are controller connections and listeners. + * + * This function will count how many connections of different types + * exist and log it for purposes of gathering data on typical OOS + * situations to guide future improvements. + */ + + /* First, get the connection array */ + conns = get_connection_array(); + /* + * Iterate it and pick out eligible connection types, and log some stats + * along the way. + */ + eligible = smartlist_new(); + memset(conn_counts_by_type, 0, sizeof(conn_counts_by_type)); + SMARTLIST_FOREACH_BEGIN(conns, connection_t *, c) { + /* Bump the counter */ + tor_assert(c->type <= CONN_TYPE_MAX_); + ++(conn_counts_by_type[c->type]); + + /* Skip anything without a socket we can free */ + if (!(SOCKET_OK(c->s))) { + continue; + } + + /* Skip anything we would count as moribund */ + if (connection_is_moribund(c)) { + continue; + } + + switch (c->type) { + case CONN_TYPE_OR: + /* We've got an orconn, it's eligible to be OOSed */ + smartlist_add(eligible, c); + break; + default: + /* We don't know what to do with it, ignore it */ + break; + } + } SMARTLIST_FOREACH_END(c); + + /* Log some stats */ + if (smartlist_len(conns) > 0) { + /* At least one counter must be non-zero */ + log_info(LD_NET, "Some stats on conn types seen during OOS follow"); + for (i = CONN_TYPE_MIN_; i <= CONN_TYPE_MAX_; ++i) { + /* Did we see any? */ + if (conn_counts_by_type[i] > 0) { + log_info(LD_NET, "%s: %d conns", + conn_type_to_string(i), + conn_counts_by_type[i]); + } + } + log_info(LD_NET, "Done with OOS conn type stats"); + } + + /* Did we find more eligible targets than we want to kill? */ + if (smartlist_len(eligible) > n) { + /* Sort the list in order of target preference */ + smartlist_sort(eligible, oos_victim_comparator); + /* Pick first n as victims */ + victims = smartlist_new(); + for (i = 0; i < n; ++i) { + smartlist_add(victims, smartlist_get(eligible, i)); + } + /* Free the original list */ + smartlist_free(eligible); + } else { + /* No, we can just call them all victims */ + victims = eligible; + } + + return victims; +} + +/** Kill a list of connections for the OOS handler. */ +MOCK_IMPL(STATIC void, +kill_conn_list_for_oos, (smartlist_t *conns)) +{ + if (!conns) return; + + SMARTLIST_FOREACH_BEGIN(conns, connection_t *, c) { + /* Make sure the channel layer gets told about orconns */ + if (c->type == CONN_TYPE_OR) { + connection_or_close_for_error(TO_OR_CONN(c), 1); + } else { + connection_mark_for_close(c); + } + } SMARTLIST_FOREACH_END(c); + + log_notice(LD_NET, + "OOS handler marked %d connections", + smartlist_len(conns)); +} + +/** Out-of-Sockets handler; n_socks is the current number of open + * sockets, and failed is non-zero if a socket exhaustion related + * error immediately preceded this call. This is where to do + * circuit-killing heuristics as needed. + */ +void +connection_check_oos(int n_socks, int failed) +{ + int target_n_socks = 0, moribund_socks, socks_to_kill; + smartlist_t *conns; + + /* Early exit: is OOS checking disabled? */ + if (get_options()->DisableOOSCheck) { + return; + } + + /* Sanity-check args */ + tor_assert(n_socks >= 0); + + /* + * Make some log noise; keep it at debug level since this gets a chance + * to run on every connection attempt. + */ + log_debug(LD_NET, + "Running the OOS handler (%d open sockets, %s)", + n_socks, (failed != 0) ? "exhaustion seen" : "no exhaustion"); + + /* + * Check if we're really handling an OOS condition, and if so decide how + * many sockets we want to get down to. Be sure we check if the threshold + * is distinct from zero first; it's possible for this to be called a few + * times before we've finished reading the config. + */ + if (n_socks >= get_options()->ConnLimit_high_thresh && + get_options()->ConnLimit_high_thresh != 0 && + get_options()->ConnLimit_ != 0) { + /* Try to get down to the low threshold */ + target_n_socks = get_options()->ConnLimit_low_thresh; + log_notice(LD_NET, + "Current number of sockets %d is greater than configured " + "limit %d; OOS handler trying to get down to %d", + n_socks, get_options()->ConnLimit_high_thresh, + target_n_socks); + } else if (failed) { + /* + * If we're not at the limit but we hit a socket exhaustion error, try to + * drop some (but not as aggressively as ConnLimit_low_threshold, which is + * 3/4 of ConnLimit_) + */ + target_n_socks = (n_socks * 9) / 10; + log_notice(LD_NET, + "We saw socket exhaustion at %d open sockets; OOS handler " + "trying to get down to %d", + n_socks, target_n_socks); + } + + if (target_n_socks > 0) { + /* + * It's an OOS! + * + * Count moribund sockets; it's be important that anything we decide + * to get rid of here but don't immediately close get counted as moribund + * on subsequent invocations so we don't try to kill too many things if + * connection_check_oos() gets called multiple times. + */ + moribund_socks = connection_count_moribund(); + + if (moribund_socks < n_socks - target_n_socks) { + socks_to_kill = n_socks - target_n_socks - moribund_socks; + + conns = pick_oos_victims(socks_to_kill); + if (conns) { + kill_conn_list_for_oos(conns); + log_notice(LD_NET, + "OOS handler killed %d conns", smartlist_len(conns)); + smartlist_free(conns); + } else { + log_notice(LD_NET, "OOS handler failed to pick any victim conns"); + } + } else { + log_notice(LD_NET, + "Not killing any sockets for OOS because there are %d " + "already moribund, and we only want to eliminate %d", + moribund_socks, n_socks - target_n_socks); + } + } +} + /** Log how many bytes are used by buffers of different kinds and sizes. */ void connection_dump_buffer_mem_stats(int severity) @@ -4906,15 +4902,6 @@ assert_connection_ok(connection_t *conn, time_t now) tor_assert(conn->type >= CONN_TYPE_MIN_); tor_assert(conn->type <= CONN_TYPE_MAX_); -#ifdef USE_BUFFEREVENTS - if (conn->bufev) { - tor_assert(conn->read_event == NULL); - tor_assert(conn->write_event == NULL); - tor_assert(conn->inbuf == NULL); - tor_assert(conn->outbuf == NULL); - } -#endif - switch (conn->type) { case CONN_TYPE_OR: case CONN_TYPE_EXT_OR: @@ -5178,11 +5165,6 @@ connection_free_all(void) tor_free(last_interface_ipv4); tor_free(last_interface_ipv6); - -#ifdef USE_BUFFEREVENTS - if (global_rate_limit) - bufferevent_rate_limit_group_free(global_rate_limit); -#endif } /** Log a warning, and possibly emit a control event, that <b>received</b> came |