diff options
author | Nick Mathewson <nickm@torproject.org> | 2005-01-12 06:42:32 +0000 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2005-01-12 06:42:32 +0000 |
commit | 324b192f68bfe697009831a5bad3acdd7bd2cec5 (patch) | |
tree | 13f8d3bd453244fcad1f519667bb4483156113bf | |
parent | 9b578f2fe2665a1b1a75f6fb5a85f8a77ee31545 (diff) | |
download | tor-324b192f68bfe697009831a5bad3acdd7bd2cec5.tar.gz tor-324b192f68bfe697009831a5bad3acdd7bd2cec5.zip |
Make Tor use Niels Provos's libevent instead of it's current
poll-but-sometimes-select mess. This will let us use faster async cores
(like epoll, kpoll, and /dev/poll), and hopefully work better on Windows
too.
There are some fairly nasty changes to main.c here; this will almost
certainly break something. But hey, that's what alphas are for.
svn:r3341
-rw-r--r-- | configure.in | 5 | ||||
-rw-r--r-- | doc/TODO | 18 | ||||
-rw-r--r-- | src/common/Makefile.am | 4 | ||||
-rw-r--r-- | src/common/compat.c | 1 | ||||
-rw-r--r-- | src/common/compat.h | 4 | ||||
-rw-r--r-- | src/common/fakepoll.c | 108 | ||||
-rw-r--r-- | src/common/fakepoll.h | 73 | ||||
-rw-r--r-- | src/or/config.c | 3 | ||||
-rw-r--r-- | src/or/connection.c | 25 | ||||
-rw-r--r-- | src/or/connection_edge.c | 6 | ||||
-rw-r--r-- | src/or/connection_or.c | 4 | ||||
-rw-r--r-- | src/or/directory.c | 4 | ||||
-rw-r--r-- | src/or/main.c | 492 | ||||
-rw-r--r-- | src/or/or.h | 11 |
14 files changed, 355 insertions, 403 deletions
diff --git a/configure.in b/configure.in index 05cd361ac4..5a7022417d 100644 --- a/configure.in +++ b/configure.in @@ -32,6 +32,7 @@ AC_ARG_WITH(ssl-dir, AC_SEARCH_LIBS(socket, [socket]) AC_SEARCH_LIBS(gethostbyname, [nsl]) +AC_SEARCH_LIBS(event_loop, [event], , AC_MSG_ERROR(Libevent library not found. Tor requires libevent to build.)) saved_LIBS="$LIBS" saved_LDFLAGS="$LDFLAGS" @@ -139,7 +140,9 @@ AC_SYS_LARGEFILE dnl The warning message here is no longer strictly accurate. -AC_CHECK_HEADERS(unistd.h string.h signal.h netdb.h ctype.h poll.h sys/stat.h sys/poll.h sys/types.h fcntl.h sys/fcntl.h sys/ioctl.h sys/socket.h sys/time.h netinet/in.h arpa/inet.h errno.h assert.h time.h pwd.h grp.h zlib.h, , AC_MSG_WARN(some headers were not found, compilation may fail)) +AC_CHECK_HEADERS(unistd.h string.h signal.h netdb.h ctype.h sys/stat.h sys/types.h fcntl.h sys/fcntl.h sys/ioctl.h sys/socket.h sys/time.h netinet/in.h arpa/inet.h errno.h assert.h time.h pwd.h grp.h zlib.h, , AC_MSG_WARN(some headers were not found, compilation may fail)) + +AC_CHECK_HEADERS(event.h, , AC_MSG_ERROR(Libevent header (event.h) not found. Tor requires libevent to build.)) dnl These headers are not essential @@ -70,7 +70,7 @@ Tier one: - Windows N - Make millisecond accuracy work on win32 - - Switch to WSA*Event code as a better poll replacement. Or maybe just + X Switch to WSA*Event code as a better poll replacement. Or maybe just do libevent? - Code cleanup @@ -101,10 +101,24 @@ Tier two: - Limit number of circuits that we preemptively generate based on past behavior; use same limits in circuit_expire_old_circuits(). - Write limiting; configurable token buckets. - - Switch to libevent? Evaluate it first. - Make it harder to circumvent bandwidth caps: look at number of bytes sent across sockets, not number sent inside TLS stream. + . Switch to libevent + o Evaluate libevent + o Convert socket handling + o Convert signal handling + o Convert timers + o Update configure.in + o Remove fakepoll + - Hold-open-until-flushed now works by accident; it should work by + design. + - The logic for reading from TLS sockets is likely to overrun the + bandwidth buckets under heavy load. (Really, the logic was + never right in the first place.) Also, we should audit all users + of get_pending_bytes(). + - Make sure it works on more platforms. + - Find a way to make sure we have libevent 1.0 or later. - QOI - Let more config options (e.g. ORPort) change dynamically. diff --git a/src/common/Makefile.am b/src/common/Makefile.am index 65f722bfe8..ae3f8a8f7f 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -3,7 +3,7 @@ noinst_LIBRARIES = libor.a libor-crypto.a #CFLAGS = -Wall -Wpointer-arith -O2 -libor_a_SOURCES = log.c fakepoll.c util.c compat.c container.c +libor_a_SOURCES = log.c util.c compat.c container.c libor_crypto_a_SOURCES = crypto.c aes.c tortls.c torgzip.c -noinst_HEADERS = log.h crypto.h fakepoll.h test.h util.h compat.h aes.h torint.h tortls.h strlcpy.c strlcat.c torgzip.h container.h +noinst_HEADERS = log.h crypto.h test.h util.h compat.h aes.h torint.h tortls.h strlcpy.c strlcat.c torgzip.h container.h diff --git a/src/common/compat.c b/src/common/compat.c index e9f0b23371..69b82f8a41 100644 --- a/src/common/compat.c +++ b/src/common/compat.c @@ -8,7 +8,6 @@ const char compat_c_id[] = "$Id$"; #define _GNU_SOURCE #include "orconfig.h" -#include "fakepoll.h" #include "compat.h" #ifdef MS_WINDOWS diff --git a/src/common/compat.h b/src/common/compat.h index d9cb5669d6..186416d882 100644 --- a/src/common/compat.h +++ b/src/common/compat.h @@ -115,6 +115,10 @@ int replace_file(const char *from, const char *to); #define tor_close_socket(s) close(s) #endif +/* Now that we use libevent, all real sockets are safe for polling ... or + * if they aren't, libevent will help us. */ +#define SOCKET_IS_POLLABLE(fd) ((fd)>=0) + struct in_addr; int tor_inet_aton(const char *cp, struct in_addr *addr); int tor_lookup_hostname(const char *name, uint32_t *addr); diff --git a/src/common/fakepoll.c b/src/common/fakepoll.c deleted file mode 100644 index 19f5c1de31..0000000000 --- a/src/common/fakepoll.c +++ /dev/null @@ -1,108 +0,0 @@ -/* Copyright 2002,2003 Nick Mathewson, Roger Dingledine */ -/* See LICENSE for licensing information */ -/* $Id$ */ -const char fakepoll_c_id[] = "$Id$"; - -/** - * \file fakepoll.c - * - * \brief On systems where poll() doesn't exist, fake it with select(). - **/ - -#include "orconfig.h" -#include "fakepoll.h" - -#ifdef HAVE_SYS_TYPES_H -#include <sys/types.h> -#endif -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif -#ifdef HAVE_STRING_H -#include <string.h> -#endif -#ifdef HAVE_SYS_TIME_H -#include <sys/time.h> -#endif - -#include <assert.h> -#include <stdlib.h> -#include "util.h" -#include "log.h" - -#ifndef USE_FAKE_POLL -int -tor_poll(struct pollfd *ufds, unsigned int nfds, int timeout) -{ - unsigned int i; - for (i=0;i<nfds;++i) { - tor_assert(ufds[i].fd >= 0); - } - return poll(ufds,nfds,timeout); -} -#else -int -tor_poll(struct pollfd *ufds, unsigned int nfds, int timeout) -{ - unsigned int idx; - int maxfd, fd; - int r; -#ifdef MS_WINDOWS - int any_fds_set = 0; -#endif - fd_set readfds, writefds, exceptfds; -#ifdef USING_FAKE_TIMEVAL -#undef timeval -#undef tv_sec -#undef tv_usec -#endif - struct timeval _timeout; - _timeout.tv_sec = timeout/1000; - _timeout.tv_usec = (timeout%1000)*1000; - FD_ZERO(&readfds); - FD_ZERO(&writefds); - FD_ZERO(&exceptfds); - - maxfd = -1; - for (idx = 0; idx < nfds; ++idx) { - ufds[idx].revents = 0; - fd = ufds[idx].fd; - tor_assert(SOCKET_IS_POLLABLE(fd)); - if (fd > maxfd) { - maxfd = fd; -#ifdef MS_WINDOWS - any_fds_set = 1; -#endif - } - if (ufds[idx].events & POLLIN) - FD_SET(fd, &readfds); - if (ufds[idx].events & POLLOUT) - FD_SET(fd, &writefds); - FD_SET(fd, &exceptfds); - } -#ifdef MS_WINDOWS - if (!any_fds_set) { - Sleep(timeout); - return 0; - } -#endif - r = select(maxfd+1, &readfds, &writefds, &exceptfds, - timeout == -1 ? NULL : &_timeout); - if (r <= 0) - return r; - r = 0; - for (idx = 0; idx < nfds; ++idx) { - fd = ufds[idx].fd; - if (FD_ISSET(fd, &readfds)) - ufds[idx].revents |= POLLIN; - if (FD_ISSET(fd, &writefds)) - ufds[idx].revents |= POLLOUT; - if (FD_ISSET(fd, &exceptfds)) - ufds[idx].revents |= POLLERR; - if (ufds[idx].revents) - ++r; - } - return r; -} -#endif - diff --git a/src/common/fakepoll.h b/src/common/fakepoll.h deleted file mode 100644 index 3f3d1610bb..0000000000 --- a/src/common/fakepoll.h +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright 2002,2003 Nick Mathewson, Roger Dingledine. */ -/* See LICENSE for licensing information */ -/* $Id$ */ - -#ifndef __FAKEPOLL_H -#define __FAKEPOLL_H -#define FAKEPOLL_H_ID "$Id$" - -/** - * \file fakepoll.h - * \brief Headers for fakepoll.c - */ - -#include "orconfig.h" - -#define POLL_NO_WARN - -#if defined(HAVE_POLL_H) -#include <poll.h> -#elif defined(HAVE_SYS_POLL_H) -#include <sys/poll.h> -#endif - -/* If _POLL_EMUL_H_ is defined, then poll is just a just a thin wrapper around - * select. On Mac OS 10.3, this wrapper is kinda flaky, and we should - * use our own. - */ -#if !(defined(HAVE_POLL_H)||defined(HAVE_SYS_POLL_H))&&!defined(_POLL_EMUL_H_) -#define USE_FAKE_POLL -#endif - -#if defined USE_FAKE_POLL && !defined(_POLL_EMUL_H_) -struct pollfd { - int fd; - short events; - short revents; -}; - -#define POLLIN 0x0001 -#define POLLPRI 0x0002 -#define POLLOUT 0x0004 -#define POLLERR 0x0008 -#define POLLHUP 0x0010 -#define POLLNVAL 0x0020 -#endif - -#ifdef MS_WINDOWS -#define MAXCONNECTIONS 10000 /* XXXX copied from or.h */ -/* This trick makes winsock resize fd_set, which defaults to the insanely low - * 64. */ -#define FD_SETSIZE MAXCONNECTIONS -/* XXXX But Windows FD_SET and FD_CLR are tremendously ugly, and linear in - * the total number of sockets set! Perhaps we should eventually use - * WSAEventSelect and WSAWaitForMultipleEvents instead of select? */ -#endif - -#if defined(MS_WINDOWS) || ! defined(USE_FAKE_POLL) -/* If we're using poll, we can poll as many sockets as we want. - * If we're on Windows, having too many sockets is harmless, since - * select stupidly uses an array of sockets rather than a bitfield. */ -#define SOCKET_IS_POLLABLE(fd) ((fd) >= 0) -#else -/* If we're using a real Posix select, then in order to be pollable, a socket - * must - * a) be valid (>= 0) - * b) be < FD_SETSIZE. - */ -#define SOCKET_IS_POLLABLE(fd) ((fd) >= 0 && (fd) < FD_SETSIZE) -#endif - -int tor_poll(struct pollfd *ufds, unsigned int nfds, int timeout); - -#endif diff --git a/src/or/config.c b/src/or/config.c index 6e9ca1d71e..84d8200097 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -2435,7 +2435,6 @@ print_cvs_version(void) extern const char compat_c_id[]; extern const char container_c_id[]; extern const char crypto_c_id[]; - extern const char fakepoll_c_id[]; extern const char log_c_id[]; extern const char torgzip_c_id[]; extern const char tortls_c_id[]; @@ -2472,7 +2471,6 @@ print_cvs_version(void) puts(COMPAT_H_ID); puts(CONTAINER_H_ID); puts(CRYPTO_H_ID); - puts(FAKEPOLL_H_ID); puts(LOG_H_ID); puts(TORGZIP_H_ID); puts(TORINT_H_ID); @@ -2482,7 +2480,6 @@ print_cvs_version(void) puts(compat_c_id); puts(container_c_id); puts(crypto_c_id); - puts(fakepoll_c_id); puts(log_c_id); puts(torgzip_c_id); puts(tortls_c_id); diff --git a/src/or/connection.c b/src/or/connection.c index 86a89f3cb0..6a39cb862c 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -167,6 +167,14 @@ void connection_free(connection_t *conn) { log_fn(LOG_INFO,"closing fd %d.",conn->s); tor_close_socket(conn->s); } + if (conn->read_event) { + event_del(conn->read_event); + tor_free(conn->read_event); + } + if (conn->write_event) { + event_del(conn->write_event); + tor_free(conn->write_event); + } memset(conn, 0xAA, sizeof(connection_t)); /* poison memory */ tor_free(conn); } @@ -300,6 +308,7 @@ _connection_mark_for_close(connection_t *conn) } conn->marked_for_close = 1; + add_connection_to_closeable_list(conn); /* in case we're going to be held-open-til-flushed, reset * the number of seconds since last successful write, so @@ -904,6 +913,7 @@ static int connection_read_to_buf(connection_t *conn, int *max_to_read) { } if (connection_speaks_cells(conn) && conn->state != OR_CONN_STATE_CONNECTING) { + int pending; if (conn->state == OR_CONN_STATE_HANDSHAKING) { /* continue handshaking even if global token bucket is empty */ return connection_tls_continue_handshake(conn); @@ -931,7 +941,22 @@ static int connection_read_to_buf(connection_t *conn, int *max_to_read) { case TOR_TLS_DONE: /* no data read, so nothing to process */ result = 0; break; /* so we call bucket_decrement below */ + default: + break; + } + pending = tor_tls_get_pending_bytes(conn->tls); + if (pending) { + /* XXXX If we have any pending bytes, read them now. This *can* + * take us over our read alotment, but really we shouldn't be + * believing that SSL bytes are the same as TCP bytes anyway. */ + int r2 = read_to_buf_tls(conn->tls, pending, conn->inbuf); + if (r2<0) { + log_fn(LOG_WARN, "Bug: apparently, reading pending bytes can fail."); + } else { + result += r2; + } } + } else { result = read_to_buf(conn->s, at_most, conn->inbuf, &conn->inbuf_reached_eof); diff --git a/src/or/connection_edge.c b/src/or/connection_edge.c index 0c7acca3ab..5cbb312d21 100644 --- a/src/or/connection_edge.c +++ b/src/or/connection_edge.c @@ -203,7 +203,7 @@ int connection_edge_finished_connecting(connection_t *conn) conn->address,conn->port); conn->state = EXIT_CONN_STATE_OPEN; - connection_watch_events(conn, POLLIN); /* stop writing, continue reading */ + connection_watch_events(conn, EV_READ); /* stop writing, continue reading */ if (connection_wants_to_flush(conn)) /* in case there are any queued relay cells */ connection_start_writing(conn); /* deliver a 'connected' relay cell back through the circuit. */ @@ -949,7 +949,7 @@ connection_exit_connect(connection_t *conn) { case 0: conn->state = EXIT_CONN_STATE_CONNECTING; - connection_watch_events(conn, POLLOUT | POLLIN | POLLERR); + connection_watch_events(conn, EV_WRITE | EV_READ); /* writable indicates finish, readable indicates broken link, error indicates broken link in windowsland. */ return; @@ -961,7 +961,7 @@ connection_exit_connect(connection_t *conn) { log_fn(LOG_WARN,"Bug: newly connected conn had data waiting!"); // connection_start_writing(conn); } - connection_watch_events(conn, POLLIN); + connection_watch_events(conn, EV_READ); /* also, deliver a 'connected' cell back through the circuit. */ if (connection_edge_is_rendezvous_stream(conn)) { /* rendezvous stream */ diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 2fed1f89a7..f1ec0fcf95 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -238,7 +238,7 @@ connection_t *connection_or_connect(uint32_t addr, uint16_t port, connection_free(conn); return NULL; case 0: - connection_watch_events(conn, POLLIN | POLLOUT | POLLERR); + connection_watch_events(conn, EV_READ | EV_WRITE); /* writable indicates finish, readable indicates broken link, error indicates broken link on windows */ return conn; @@ -342,7 +342,7 @@ connection_tls_finish_handshake(connection_t *conn) { or_options_t *options = get_options(); conn->state = OR_CONN_STATE_OPEN; - connection_watch_events(conn, POLLIN); + connection_watch_events(conn, EV_READ); log_fn(LOG_DEBUG,"tls handshake done. verifying."); if (! tor_tls_peer_has_cert(conn->tls)) { /* It's an old OP. */ if (server_mode(options)) { /* I'm an OR; good. */ diff --git a/src/or/directory.c b/src/or/directory.c index 1b65505916..b4c5838c8e 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -321,7 +321,7 @@ directory_initiate_command(const char *address, uint32_t addr, /* queue the command on the outbuf */ directory_send_command(conn, platform, purpose, resource, payload, payload_len); - connection_watch_events(conn, POLLIN | POLLOUT | POLLERR); + connection_watch_events(conn, EV_READ | EV_WRITE); /* writable indicates finish, readable indicates broken link, error indicates broken link in windowsland. */ } @@ -342,7 +342,7 @@ directory_initiate_command(const char *address, uint32_t addr, /* queue the command on the outbuf */ directory_send_command(conn, platform, purpose, resource, payload, payload_len); - connection_watch_events(conn, POLLIN | POLLOUT | POLLERR); + connection_watch_events(conn, EV_READ | EV_WRITE); } } diff --git a/src/or/main.c b/src/or/main.c index f83c26381a..d0c37815bd 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -12,9 +12,31 @@ const char main_c_id[] = "$Id$"; #include "or.h" +/* These signals are defined to help control_singal_act work. */ +#ifndef SIGHUP +#define SIGHUP 1 +#endif +#ifndef SIGINT +#define SIGINT 2 +#endif +#ifndef SIGUSR1 +#define SIGUSR1 10 +#endif +#ifndef SIGUSR2 +#define SIGUSR2 12 +#endif +#ifndef SIGTERM +#define SIGTERM 15 +#endif + /********* PROTOTYPES **********/ static void dumpstats(int severity); /* log stats */ +static void conn_read_callback(int fd, short event, void *_conn); +static void conn_write_callback(int fd, short event, void *_conn); +static void signal_callback(int fd, short events, void *arg); +static void second_elapsed_callback(int fd, short event, void *args); +static int conn_close_if_marked(int i); /********* START VARIABLES **********/ @@ -45,22 +67,10 @@ static time_t time_to_fetch_running_routers = 0; * poll_array in the same position. The first nfds elements are valid. */ static connection_t *connection_array[MAXCONNECTIONS] = { NULL }; - -/** Array of pollfd objects for calls to poll(). */ -static struct pollfd poll_array[MAXCONNECTIONS]; +static smartlist_t *closeable_connection_lst = NULL; static int nfds=0; /**< Number of connections currently active. */ -#ifndef MS_WINDOWS /* do signal stuff only on unix */ -static int please_dumpstats=0; /**< Whether we should dump stats during the loop. */ -static int please_debug=0; /**< Whether we should switch all logs to -l debug. */ -static int please_reset=0; /**< Whether we just got a sighup. */ -static int please_reap_children=0; /**< Whether we should waitpid for exited children. */ -static int please_sigpipe=0; /**< Whether we've caught a sigpipe lately. */ -static int please_shutdown=0; /**< Whether we should slowly shut down Tor. */ -static int please_die=0; /**< Whether we should immediately shut down Tor. */ -#endif /* signal stuff */ - /** We set this to 1 when we've fetched a dir, to know whether to complain * yet about unrecognized nicknames in entrynodes, exitnodes, etc. * Also, we don't try building circuits unless this is 1. */ @@ -110,11 +120,12 @@ int connection_add(connection_t *conn) { conn->poll_index = nfds; connection_array[nfds] = conn; - poll_array[nfds].fd = conn->s; - - /* zero these out here, because otherwise we'll inherit values from the previously freed one */ - poll_array[nfds].events = 0; - poll_array[nfds].revents = 0; + conn->read_event = tor_malloc_zero(sizeof(struct event)); + conn->write_event = tor_malloc_zero(sizeof(struct event)); + event_set(conn->read_event, conn->s, EV_READ|EV_PERSIST, + conn_read_callback, conn); + event_set(conn->write_event, conn->s, EV_WRITE|EV_PERSIST, + conn_write_callback, conn); nfds++; @@ -144,17 +155,33 @@ int connection_remove(connection_t *conn) { return 0; } + if (conn->read_event) { + event_del(conn->read_event); + tor_free(conn->read_event); + } + if (conn->write_event) { + event_del(conn->write_event); + tor_free(conn->write_event); + } + /* replace this one with the one at the end */ nfds--; - poll_array[current_index].fd = poll_array[nfds].fd; - poll_array[current_index].events = poll_array[nfds].events; - poll_array[current_index].revents = poll_array[nfds].revents; connection_array[current_index] = connection_array[nfds]; connection_array[current_index]->poll_index = current_index; return 0; } +/** DOCDOC **/ +void +add_connection_to_closeable_list(connection_t *conn) +{ + tor_assert(!smartlist_isin(closeable_connection_lst, conn)); + tor_assert(conn->marked_for_close); + + smartlist_add(closeable_connection_lst, conn); +} + /** Return true iff conn is in the current poll array. */ int connection_in_array(connection_t *conn) { int i; @@ -175,67 +202,150 @@ void get_connection_array(connection_t ***array, int *n) { } /** Set the event mask on <b>conn</b> to <b>events</b>. (The form of -* the event mask is as for poll().) +* the event mask is DOCDOC) */ void connection_watch_events(connection_t *conn, short events) { - tor_assert(conn); - tor_assert(conn->poll_index >= 0); - tor_assert(conn->poll_index < nfds); + tor_assert(conn->read_event); + tor_assert(conn->write_event); + + if (events & EV_READ) { + event_add(conn->read_event, NULL); + } else { + event_del(conn->read_event); + } - poll_array[conn->poll_index].events = events; + if (events & EV_WRITE) { + event_add(conn->write_event, NULL); + } else { + event_del(conn->write_event); + } } /** Return true iff <b>conn</b> is listening for read events. */ int connection_is_reading(connection_t *conn) { tor_assert(conn); - tor_assert(conn->poll_index >= 0); - return poll_array[conn->poll_index].events & POLLIN; + + /* This isn't 100% documented, but it should work. */ + return conn->read_event && + (conn->read_event->ev_flags & (EVLIST_INSERTED|EVLIST_ACTIVE)); } /** Tell the main loop to stop notifying <b>conn</b> of any read events. */ void connection_stop_reading(connection_t *conn) { tor_assert(conn); - tor_assert(conn->poll_index >= 0); - tor_assert(conn->poll_index < nfds); + tor_assert(conn->read_event); log(LOG_DEBUG,"connection_stop_reading() called."); - poll_array[conn->poll_index].events &= ~POLLIN; + event_del(conn->read_event); } /** Tell the main loop to start notifying <b>conn</b> of any read events. */ void connection_start_reading(connection_t *conn) { tor_assert(conn); - tor_assert(conn->poll_index >= 0); - tor_assert(conn->poll_index < nfds); - poll_array[conn->poll_index].events |= POLLIN; + tor_assert(conn->read_event); + + event_add(conn->read_event, NULL); } /** Return true iff <b>conn</b> is listening for write events. */ int connection_is_writing(connection_t *conn) { - return poll_array[conn->poll_index].events & POLLOUT; + tor_assert(conn); + + /* This isn't 100% documented, but it should work. */ + return conn->write_event && + (conn->write_event->ev_flags & (EVLIST_INSERTED|EVLIST_ACTIVE)); } /** Tell the main loop to stop notifying <b>conn</b> of any write events. */ void connection_stop_writing(connection_t *conn) { tor_assert(conn); - tor_assert(conn->poll_index >= 0); - tor_assert(conn->poll_index < nfds); - poll_array[conn->poll_index].events &= ~POLLOUT; + tor_assert(conn->write_event); + + event_del(conn->write_event); } /** Tell the main loop to start notifying <b>conn</b> of any write events. */ void connection_start_writing(connection_t *conn) { tor_assert(conn); - tor_assert(conn->poll_index >= 0); - tor_assert(conn->poll_index < nfds); - poll_array[conn->poll_index].events |= POLLOUT; + tor_assert(conn->write_event); + + event_add(conn->write_event, NULL); } -/** Called when the connection at connection_array[i] has a read event, - * or it has pending tls data waiting to be read: checks for validity, - * catches numerous errors, and dispatches to connection_handle_read. - */ +/** DOCDOC */ +static void +close_closeable_connections(void) +{ + int i; + if (!smartlist_len(closeable_connection_lst)) + return; + + for (i = 0; i < smartlist_len(closeable_connection_lst); ) { + connection_t *conn = smartlist_get(closeable_connection_lst, i); + if (!conn_close_if_marked(conn->poll_index)) + ++i; + } +} + +/** DOCDOC */ +static void +conn_read_callback(int fd, short event, void *_conn) +{ + connection_t *conn = _conn; + if (conn->marked_for_close) + return; + + log_fn(LOG_DEBUG,"socket %d wants to read.",conn->s); + + assert_connection_ok(conn, time(NULL)); + assert_all_pending_dns_resolves_ok(); + + if (connection_handle_read(conn) < 0) { + if (!conn->marked_for_close) { +#ifndef MS_WINDOWS + log_fn(LOG_WARN,"Bug: unhandled error on read for %s connection (fd %d); removing", + CONN_TYPE_TO_STRING(conn->type), conn->s); +#endif + connection_mark_for_close(conn); + } + } + assert_connection_ok(conn, time(NULL)); + assert_all_pending_dns_resolves_ok(); + + if (smartlist_len(closeable_connection_lst)) + close_closeable_connections(); +} + +static void conn_write_callback(int fd, short events, void *_conn) +{ + connection_t *conn = _conn; + + log_fn(LOG_DEBUG,"socket %d wants to write.",conn->s); + if (conn->marked_for_close) + return; + + assert_connection_ok(conn, time(NULL)); + assert_all_pending_dns_resolves_ok(); + + if (connection_handle_write(conn) < 0) { + if (!conn->marked_for_close) { + /* this connection is broken. remove it. */ + log_fn(LOG_WARN,"Bug: unhandled error on write for %s connection (fd %d); removing", + CONN_TYPE_TO_STRING(conn->type), conn->s); + conn->has_sent_end = 1; /* otherwise we cry wolf about duplicate close */ + /* XXX do we need a close-immediate here, so we don't try to flush? */ + connection_mark_for_close(conn); + } + } + assert_connection_ok(conn, time(NULL)); + assert_all_pending_dns_resolves_ok(); + + if (smartlist_len(closeable_connection_lst)) + close_closeable_connections(); +} + +#if 0 static void conn_read(int i) { connection_t *conn = connection_array[i]; @@ -336,6 +446,7 @@ static void conn_write(int i) { assert_connection_ok(conn, time(NULL)); assert_all_pending_dns_resolves_ok(); } +#endif /** If the connection at connection_array[i] is marked for close, then: * - If it has data that it wants to flush, try to flush it. @@ -343,16 +454,16 @@ static void conn_write(int i) { * true, then leave the connection open and return. * - Otherwise, remove the connection from connection_array and from * all other lists, close it, and free it. - * If we remove the connection, then call conn_closed_if_marked at the new - * connection at position i. + * Returns 1 if the connection was closed, 0 otherwise. + * DOCDOC closeable_list */ -static void conn_close_if_marked(int i) { +static int conn_close_if_marked(int i) { connection_t *conn; int retval; conn = connection_array[i]; if (!conn->marked_for_close) - return; /* nothing to see here, move along */ + return 0; /* nothing to see here, move along */ assert_connection_ok(conn, time(NULL)); assert_all_pending_dns_resolves_ok(); @@ -378,7 +489,7 @@ static void conn_close_if_marked(int i) { conn->hold_open_until_flushed && connection_wants_to_flush(conn)) { log_fn(LOG_INFO,"Holding conn (fd %d) open for more flushing.",conn->s); /* XXX should we reset timestamp_lastwritten here? */ - return; + return 0; } if (connection_wants_to_flush(conn)) { log_fn(LOG_NOTICE,"Conn (addr %s, fd %d, type %s, state %d) is being closed, but there are still %d bytes we can't write. (Marked at %s:%d)", @@ -394,14 +505,12 @@ static void conn_close_if_marked(int i) { circuit_about_to_close_connection(conn); connection_about_to_close_connection(conn); connection_remove(conn); + smartlist_remove(closeable_connection_lst, conn); if (conn->type == CONN_TYPE_EXIT) { assert_connection_edge_not_dns_pending(conn); } connection_free(conn); - if (i<nfds) { /* we just replaced the one at i with a new one. - process it too. */ - conn_close_if_marked(i); - } + return 1; } /** We've just tried every dirserver we know about, and none of @@ -731,61 +840,65 @@ static void run_scheduled_events(time_t now) { * because if we marked a conn for close and left its socket -1, then * we'll pass it to poll/select and bad things will happen. */ - for (i=0;i<nfds;i++) - conn_close_if_marked(i); + close_closeable_connections(); } -/** Called every time we're about to call tor_poll. Increments statistics, - * and adjusts token buckets. Returns the number of milliseconds to use for - * the poll() timeout. - */ -static int prepare_for_poll(void) { - static long current_second = 0; /* from previous calls to gettimeofday */ - connection_t *conn; +/** DOCDOC */ +static void second_elapsed_callback(int fd, short event, void *args) +{ + static struct event *timeout_event = NULL; + static struct timeval one_second; + static long current_second = 0; struct timeval now; - int i; + size_t bytes_written; + size_t bytes_read; + int seconds_elapsed; + if (!timeout_event) { + timeout_event = tor_malloc_zero(sizeof(struct event)); + evtimer_set(timeout_event, second_elapsed_callback, NULL); + one_second.tv_sec = 1; + one_second.tv_usec = 0; + } + /* log_fn(LOG_NOTICE, "Tick."); */ tor_gettimeofday(&now); - if (now.tv_sec > current_second) { - /* the second has rolled over. check more stuff. */ - size_t bytes_written; - size_t bytes_read; - int seconds_elapsed; - bytes_written = stats_prev_global_write_bucket - global_write_bucket; - bytes_read = stats_prev_global_read_bucket - global_read_bucket; - seconds_elapsed = current_second ? (now.tv_sec - current_second) : 0; - stats_n_bytes_read += bytes_read; - stats_n_bytes_written += bytes_written; - if (accounting_is_enabled(get_options())) - accounting_add_bytes(bytes_read, bytes_written, seconds_elapsed); - control_event_bandwidth_used((uint32_t)bytes_read,(uint32_t)bytes_written); - - connection_bucket_refill(&now); - stats_prev_global_read_bucket = global_read_bucket; - stats_prev_global_write_bucket = global_write_bucket; - - /* if more than 10s have elapsed, probably the clock changed: doesn't count. */ - if (seconds_elapsed < 10) - stats_n_seconds_working += seconds_elapsed; - - assert_all_pending_dns_resolves_ok(); - run_scheduled_events(now.tv_sec); - assert_all_pending_dns_resolves_ok(); - - current_second = now.tv_sec; /* remember which second it is, for next time */ - } + /* the second has rolled over. check more stuff. */ + bytes_written = stats_prev_global_write_bucket - global_write_bucket; + bytes_read = stats_prev_global_read_bucket - global_read_bucket; + seconds_elapsed = current_second ? (now.tv_sec - current_second) : 0; + stats_n_bytes_read += bytes_read; + stats_n_bytes_written += bytes_written; + if (accounting_is_enabled(get_options())) + accounting_add_bytes(bytes_read, bytes_written, seconds_elapsed); + control_event_bandwidth_used((uint32_t)bytes_read,(uint32_t)bytes_written); + + connection_bucket_refill(&now); + stats_prev_global_read_bucket = global_read_bucket; + stats_prev_global_write_bucket = global_write_bucket; + + /* if more than 10s have elapsed, probably the clock changed: doesn't count. */ + if (seconds_elapsed < 10) + stats_n_seconds_working += seconds_elapsed; + + assert_all_pending_dns_resolves_ok(); + run_scheduled_events(now.tv_sec); + assert_all_pending_dns_resolves_ok(); + + current_second = now.tv_sec; /* remember which second it is, for next time */ +#if 0 for (i=0;i<nfds;i++) { conn = connection_array[i]; if (connection_has_pending_tls_data(conn) && connection_is_reading(conn)) { log_fn(LOG_DEBUG,"sock %d has pending bytes.",conn->s); - return 0; /* has pending bytes to read; don't let poll wait. */ + return; /* has pending bytes to read; don't let poll wait. */ } } +#endif - return (1000 - (now.tv_usec / 1000)); /* how many milliseconds til the next second? */ + evtimer_add(timeout_event, &one_second); } /** Called when we get a SIGHUP: reload configuration files and keys, @@ -834,9 +947,7 @@ static int do_hup(void) { /** Tor main loop. */ static int do_main_loop(void) { - int i; - int timeout; - int poll_result; + int loop_result; /* load the private keys, if we're supposed to have them, and set up the * TLS context. */ @@ -867,6 +978,9 @@ static int do_main_loop(void) { cpu_init(); } + /* set up once-a-second callback. */ + second_elapsed_callback(0,0,NULL); + for (;;) { #ifdef MS_WINDOWS_SERVICE /* Do service stuff only on windows. */ if (service_status.dwCurrentState == SERVICE_STOP_PENDING) { @@ -876,57 +990,12 @@ static int do_main_loop(void) { return 0; } #endif -#ifndef MS_WINDOWS /* do signal stuff only on unix */ - if (please_die) { - log(LOG_ERR,"Catching signal TERM, exiting cleanly."); - tor_cleanup(); - exit(0); - } - if (please_shutdown) { - if (!server_mode(get_options())) { /* do it now */ - log(LOG_NOTICE,"Interrupt: exiting cleanly."); - tor_cleanup(); - exit(0); - } - hibernate_begin_shutdown(); - please_shutdown = 0; - } - if (please_sigpipe) { - log(LOG_NOTICE,"Caught sigpipe. Ignoring."); - please_sigpipe = 0; - } - if (please_dumpstats) { - /* prefer to log it at INFO, but make sure we always see it */ - dumpstats(get_min_log_level()<LOG_INFO ? get_min_log_level() : LOG_INFO); - please_dumpstats = 0; - } - if (please_debug) { - switch_logs_debug(); - log(LOG_NOTICE,"Caught USR2. Going to loglevel debug."); - please_debug = 0; - } - if (please_reset) { - if (do_hup() < 0) { - log_fn(LOG_WARN,"Restart failed (config error?). Exiting."); - tor_cleanup(); - exit(1); - } - please_reset = 0; - } - if (please_reap_children) { - while (waitpid(-1,NULL,WNOHANG) > 0) ; /* keep reaping until no more zombies */ - please_reap_children = 0; - } -#endif /* signal stuff */ - - timeout = prepare_for_poll(); - /* poll until we have an event, or the second ends */ - poll_result = tor_poll(poll_array, nfds, timeout); + loop_result = event_dispatch(); /* let catch() handle things like ^c, and otherwise don't worry about it */ - if (poll_result < 0) { - int e = tor_socket_errno(-1); + if (loop_result < 0) { + int e = errno; /* let the program survive things like ^z */ if (e != EINTR) { log_fn(LOG_ERR,"poll failed: %s [%d]", @@ -940,20 +1009,9 @@ static int do_main_loop(void) { } } - /* do all the reads and errors first, so we can detect closed sockets */ - for (i=0;i<nfds;i++) - conn_read(i); /* this also marks broken connections */ - - /* then do the writes */ - for (i=0;i<nfds;i++) - conn_write(i); - - /* any of the conns need to be closed now? */ - for (i=0;i<nfds;i++) - conn_close_if_marked(i); - /* refilling buckets and sending cells happens at the beginning of the * next iteration of the loop, inside prepare_for_poll() + * XXXX No longer so. */ } } @@ -973,19 +1031,19 @@ control_signal_act(int the_signal) switch(the_signal) { case 1: - please_reset = 1; + signal_callback(0,0,(void*)SIGHUP); break; case 2: - please_shutdown = 1; + signal_callback(0,0,(void*)SIGINT); break; case 10: - please_dumpstats = 1; + signal_callback(0,0,(void*)SIGUSR1); break; case 12: - please_debug = 1; + signal_callback(0,0,(void*)SIGUSR2); break; case 15: - please_die = 1; + signal_callback(0,0,(void*)SIGTERM); break; default: return -1; @@ -993,45 +1051,50 @@ control_signal_act(int the_signal) return 0; } -/** Unix signal handler. */ -static void catch(int the_signal) { - -#ifndef MS_WINDOWS /* do signal stuff only on unix */ - switch (the_signal) { -// case SIGABRT: +static void signal_callback(int fd, short events, void *arg) +{ + int sig = (int) arg; + switch (sig) + { case SIGTERM: - please_die = 1; + log(LOG_ERR,"Catching signal TERM, exiting cleanly."); + tor_cleanup(); + exit(0); break; case SIGINT: - please_shutdown = 1; + if (!server_mode(get_options())) { /* do it now */ + log(LOG_NOTICE,"Interrupt: exiting cleanly."); + tor_cleanup(); + exit(0); + } + hibernate_begin_shutdown(); break; +#ifdef SIGPIPE case SIGPIPE: - /* don't log here, since it's possible you got the sigpipe because - * your log failed! */ - please_sigpipe = 1; - break; - case SIGHUP: - please_reset = 1; + log(LOG_NOTICE,"Caught sigpipe. Ignoring."); break; +#endif case SIGUSR1: - please_dumpstats = 1; + /* prefer to log it at INFO, but make sure we always see it */ + dumpstats(get_min_log_level()<LOG_INFO ? get_min_log_level() : LOG_INFO); break; case SIGUSR2: - please_debug = 1; + switch_logs_debug(); + log(LOG_NOTICE,"Caught USR2. Going to loglevel debug."); + break; + case SIGHUP: + if (do_hup() < 0) { + log_fn(LOG_WARN,"Restart failed (config error?). Exiting."); + tor_cleanup(); + exit(1); + } break; +#ifdef SIGCHLD case SIGCHLD: - please_reap_children = 1; + while (waitpid(-1,NULL,WNOHANG) > 0) ; /* keep reaping until no more zombies */ break; -#ifdef SIGXFSZ - case SIGXFSZ: /* this happens when write fails with etoobig */ - break; /* ignore; write will fail and we'll look at errno. */ + } #endif - default: - log(LOG_WARN,"Caught signal %d that we can't handle??", the_signal); - tor_cleanup(); - exit(1); - } -#endif /* signal stuff */ } /** Write all statistics to the log, with log level 'severity'. Called @@ -1120,30 +1183,49 @@ static void exit_function(void) void handle_signals(int is_parent) { #ifndef MS_WINDOWS /* do signal stuff only on unix */ - struct sigaction action; - action.sa_flags = 0; - sigemptyset(&action.sa_mask); - - action.sa_handler = is_parent ? catch : SIG_IGN; - sigaction(SIGINT, &action, NULL); /* do a controlled slow shutdown */ - sigaction(SIGTERM, &action, NULL); /* to terminate now */ - sigaction(SIGPIPE, &action, NULL); /* otherwise sigpipe kills us */ - sigaction(SIGUSR1, &action, NULL); /* dump stats */ - sigaction(SIGUSR2, &action, NULL); /* go to loglevel debug */ - sigaction(SIGHUP, &action, NULL); /* to reload config, retry conns, etc */ + int i; + static int signals[] = { + SIGINT, + SIGTERM, + SIGPIPE, + SIGUSR1, + SIGUSR2, + SIGHUP, #ifdef SIGXFSZ - sigaction(SIGXFSZ, &action, NULL); /* handle file-too-big resource exhaustion */ + SIGXFSZ, +#endif + SIGCHLD, + -1 }; + static struct event signal_events[16]; /* bigger than it has to be. */ + if (is_parent) { + for (i = 0; signals[i] >= 0; ++i) { + signal_set(&signal_events[i], signals[i], signal_callback, + (void*)signals[i]); + signal_add(&signal_events[i], NULL); + } + } else { + struct sigaction action; + action.sa_flags = 0; + sigemptyset(&action.sa_mask); + action.sa_handler = SIG_IGN; + sigaction(SIGINT, &action, NULL); /* do a controlled slow shutdown */ + sigaction(SIGTERM, &action, NULL); /* to terminate now */ + sigaction(SIGPIPE, &action, NULL); /* otherwise sigpipe kills us */ + sigaction(SIGUSR1, &action, NULL); /* dump stats */ + sigaction(SIGUSR2, &action, NULL); /* go to loglevel debug */ + sigaction(SIGHUP, &action, NULL); /* to reload config, retry conns, etc */ +#ifdef SIGXFSZ + sigaction(SIGXFSZ, &action, NULL); /* handle file-too-big resource exhaustion */ #endif - if (is_parent) - sigaction(SIGCHLD, &action, NULL); /* handle dns/cpu workers that exit */ #endif /* signal stuff */ + } } /** Main entry point for the Tor command-line client. */ static int tor_init(int argc, char *argv[]) { - time_of_process_start = time(NULL); + closeable_connection_lst = smartlist_create(); /* Initialize the history structures. */ rep_hist_init(); /* Initialize the service cache. */ @@ -1159,6 +1241,8 @@ static int tor_init(int argc, char *argv[]) { return -1; } atexit(exit_function); + event_init(); /* This needs to happen before net stuff. Is it okay if this + * happens before daemonizing? */ if (init_from_config(argc,argv) < 0) { log_fn(LOG_ERR,"Reading config failed--see warnings above. For usage, try -h."); @@ -1195,7 +1279,7 @@ void tor_cleanup(void) { accounting_record_bandwidth_usage(time(NULL)); } -/** Read/create keys as needed, and echo our fingerprint to stdout. */ +/** Read/craete keys as needed, and echo our fingerprint to stdout. */ static void do_list_fingerprint(void) { char buf[FINGERPRINT_LEN+1]; diff --git a/src/or/or.h b/src/or/or.h index bde4c2cf41..319e4f2968 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -40,7 +40,6 @@ #include <ctype.h> #endif #include "../common/torint.h" -#include "../common/fakepoll.h" #ifdef HAVE_INTTYPES_H #include <inttypes.h> #endif @@ -97,6 +96,11 @@ #ifdef HAVE_TIME_H #include <time.h> #endif +#ifdef HAVE_EVENT_H +#include <event.h> +#else +#error "Tor requires libevent to build." +#endif #ifdef MS_WINDOWS #if (_MSC_VER <= 1300) @@ -494,7 +498,9 @@ struct connection_t { * the bandwidth throttler allows reads? */ int s; /**< Our socket; -1 if this connection is closed. */ - int poll_index; /**< Index of this conn into the poll_array. */ + int poll_index; /* XXXX rename. */ + struct event *read_event; /**< libevent event structure. */ + struct event *write_event; /**< libevent event structure. */ int marked_for_close; /**< Boolean: should we close this conn on the next * iteration of the main loop? */ @@ -1380,6 +1386,7 @@ void consider_hibernation(time_t now); int connection_add(connection_t *conn); int connection_remove(connection_t *conn); int connection_in_array(connection_t *conn); +void add_connection_to_closeable_list(connection_t *conn); void get_connection_array(connection_t ***array, int *n); |