diff options
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | configure.in | 7 | ||||
-rw-r--r-- | doc/TODO | 6 | ||||
-rw-r--r-- | src/common/util.c | 4 | ||||
-rw-r--r-- | src/or/config.c | 3 | ||||
-rw-r--r-- | src/or/connection_or.c | 2 | ||||
-rw-r--r-- | src/or/directory.c | 20 | ||||
-rw-r--r-- | src/or/geoip.c | 69 | ||||
-rw-r--r-- | src/or/main.c | 7 | ||||
-rw-r--r-- | src/or/or.h | 22 | ||||
-rw-r--r-- | src/or/router.c | 2 | ||||
-rw-r--r-- | src/or/test.c | 16 |
12 files changed, 137 insertions, 24 deletions
@@ -104,6 +104,9 @@ Changes in version 0.2.1.1-alpha - 2008-??-?? Robert Hogan. Fixes the first part of bug 681. - Make bridge authorities never serve extrainfo docs. - Allow comments in geoip file. + - New configure/torrc options (--enable-geoip-stats, + DirRecordUsageByCountry) to record how many IPs we've served directory + info to in each country code. o Minor features (security): - Reject requests for reverse-dns lookup of names in a private diff --git a/configure.in b/configure.in index 0ae7cb9654..9752553406 100644 --- a/configure.in +++ b/configure.in @@ -87,6 +87,13 @@ case $host in ;; esac +AC_ARG_ENABLE(geoip-stats, + AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics)) + +if test "$enable_geoip_stats" = "yes"; then + AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics]) +fi + AC_ARG_ENABLE(gcc-warnings, AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings)) @@ -289,10 +289,10 @@ Bugs/issues for Tor 0.2.0.x: too much. o teach geoip_parse_entry() to skip over lines that start with #, so we can put a little note at the top of the geoip file to say what it is. -N d we should have an off-by-default way for relays to dump geoip data to + . we should have an off-by-default way for relays to dump geoip data to a file in their data directory, for measurement purposes. - - Basic implementation - - Include probability-of-selection + o Basic implementation +N - Include probability-of-selection R d let bridges set relaybandwidthrate as low as 5kb R - bug: if we launch using bridges, and then stop using bridges, we still have our bridges in our entryguards section, and may use them. diff --git a/src/common/util.c b/src/common/util.c index 14bd323634..a048779872 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -1559,7 +1559,6 @@ start_writing_to_file(const char *fname, int open_flags, int mode, tor_assert((open_flags & (O_BINARY|O_TEXT)) != 0); #endif new_file->fd = -1; - tempname_len = strlen(fname)+16; tor_assert(tempname_len > strlen(fname)); /*check for overflow*/ new_file->filename = tor_strdup(fname); if (open_flags & O_APPEND) { @@ -1577,8 +1576,7 @@ start_writing_to_file(const char *fname, int open_flags, int mode, new_file->rename_on_close = 1; } - if ((new_file->fd = open(open_name, open_flags, mode)) - < 0) { + if ((new_file->fd = open(open_name, open_flags, mode)) < 0) { log(LOG_WARN, LD_FS, "Couldn't open \"%s\" (%s) for writing: %s", open_name, fname, strerror(errno)); goto err; diff --git a/src/or/config.c b/src/or/config.c index 8b45e8ca2e..e55c13381f 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -179,6 +179,9 @@ static config_var_t _option_vars[] = { V(DirPolicy, LINELIST, NULL), V(DirPort, UINT, "0"), OBSOLETE("DirPostPeriod"), +#ifdef ENABLE_GEOIP_STATS + V(DirRecordUsageByCountry, BOOL, "0"), +#endif VAR("DirServer", LINELIST, DirServers, NULL), V(DNSPort, UINT, "0"), V(DNSListenAddress, LINELIST, NULL), diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 6c44840862..cdc71bbd89 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -901,7 +901,7 @@ connection_or_set_state_open(or_connection_t *conn) } else { /* only report it to the geoip module if it's not a known router */ if (!router_get_by_digest(conn->identity_digest)) - geoip_note_client_seen(TO_CONN(conn)->addr, now); + geoip_note_client_seen(GEOIP_CLIENT_CONNECT, TO_CONN(conn)->addr, now); } if (conn->handshake_state) { or_handshake_state_free(conn->handshake_state); diff --git a/src/or/directory.c b/src/or/directory.c index e753df85eb..bc3cac13ee 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -2484,6 +2484,26 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers, goto done; } +#ifdef ENABLE_GEOIP_STATS + { + geoip_client_action_t act = + is_v3 ? GEOIP_CLIENT_NETWORKSTATUS : GEOIP_CLIENT_NETWORKSTATUS_V2; + uint32_t addr = conn->_base.addr; + + if (conn->_base.linked_conn) { + connection_t *c = conn->_base.linked_conn; + if (c->type == CONN_TYPE_EXIT) { + circuit_t *circ = TO_EDGE_CONN(c)->on_circuit; + if (! CIRCUIT_IS_ORIGIN(circ)) { + or_connection_t *orconn = TO_OR_CIRCUIT(circ)->p_conn; + addr = orconn->_base.addr; + } + } + } + geoip_note_client_seen(act, addr, time(NULL)); + } +#endif + // note_request(request_type,dlen); (void) request_type; write_http_response_header(conn, -1, compressed, diff --git a/src/or/geoip.c b/src/or/geoip.c index 9cd56b1d35..35d882499f 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -131,7 +131,7 @@ _geoip_compare_key_to_entry(const void *_key, const void **_member) * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME" * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned * integers, and CC is a country code. - * + * * It also recognizes, and skips over, blank lines and lines that start * with '#' (comments). */ @@ -208,9 +208,12 @@ geoip_is_loaded(void) typedef struct clientmap_entry_t { HT_ENTRY(clientmap_entry_t) node; uint32_t ipaddr; - time_t last_seen; + time_t last_seen; /* The last 2 bits of this value hold the client + * operation. */ } clientmap_entry_t; +#define ACTION_MASK 3 + /** Map from client IP address to last time seen. */ static HT_HEAD(clientmap, clientmap_entry_t) client_history = HT_INITIALIZER(); @@ -238,12 +241,28 @@ HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash, /** Note that we've seen a client connect from the IP <b>addr</b> (host order) * at time <b>now</b>. Ignored by all but bridges. */ void -geoip_note_client_seen(uint32_t addr, time_t now) +geoip_note_client_seen(geoip_client_action_t action, + uint32_t addr, time_t now) { or_options_t *options = get_options(); clientmap_entry_t lookup, *ent; - if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry)) + if (action == GEOIP_CLIENT_CONNECT) { + if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry)) + return; + } else { +#ifndef ENABLE_GEOIP_STATS return; +#else + if (options->BridgeRelay || options->BridgeAuthoritativeDir || + !options->DirRecordUsageByCountry) + return; +#endif + } + + /* We use the low 3 bits of the time to encode the action. Since we're + * potentially remembering times of clients, we don't want to make + * clientmap_entry_t larger than it has to be. */ + now = (now & ~ACTION_MASK) | (((int)action) & ACTION_MASK); lookup.ipaddr = addr; ent = HT_FIND(clientmap, &client_history, &lookup); if (ent) { @@ -328,7 +347,7 @@ _c_hist_compare(const void **_a, const void **_b) * that country, and cc is a lowercased country code. Returns NULL if we don't * want to export geoip data yet. */ char * -geoip_get_client_history(time_t now) +geoip_get_client_history(time_t now, geoip_client_action_t action) { char *result = NULL; if (!geoip_is_loaded()) @@ -343,7 +362,10 @@ geoip_get_client_history(time_t now) unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries); unsigned total = 0; HT_FOREACH(ent, clientmap, &client_history) { - int country = geoip_get_country_by_ip((*ent)->ipaddr); + int country; + if (((*ent)->last_seen & ACTION_MASK) != action) + continue; + country = geoip_get_country_by_ip((*ent)->ipaddr); if (country < 0) continue; tor_assert(0 <= country && country < n_countries); @@ -404,6 +426,41 @@ geoip_get_client_history(time_t now) return result; } +void +dump_geoip_stats(void) +{ +#ifdef ENABLE_GEOIP_STATS + time_t now = time(NULL); + char *filename = get_datadir_fname("geoip-stats"); + char *data_v2 = NULL, *data_v3 = NULL; + char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1]; + open_file_t *open_file = NULL; + FILE *out; + + data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2); + data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS); + format_iso_time(since, geoip_get_history_start()); + format_iso_time(written, now); + if (!data_v2 || !data_v3) + goto done; + out = start_writing_to_stdio_file(filename, 0, 0600, &open_file); + if (!out) + goto done; + if (fprintf(out, "written %s\nstarted-at %s\nns %s\nns-v2%s\n", + written, since, data_v3, data_v2) < 0) + goto done; + + finish_writing_to_file(open_file); + open_file = NULL; + done: + if (open_file) + abort_writing_to_file(open_file); + tor_free(filename); + tor_free(data_v2); + tor_free(data_v3); +#endif +} + /** Helper used to implement GETINFO ip-to-country/... controller command. */ int getinfo_helper_geoip(control_connection_t *control_conn, diff --git a/src/or/main.c b/src/or/main.c index 2dc4b0697a..6993103a2d 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -832,6 +832,7 @@ run_scheduled_events(time_t now) static time_t time_to_clean_caches = 0; static time_t time_to_recheck_bandwidth = 0; static time_t time_to_check_for_expired_networkstatus = 0; + static time_t time_to_dump_geoip_stats = 0; or_options_t *options = get_options(); int i; int have_dir_info; @@ -958,6 +959,12 @@ run_scheduled_events(time_t now) time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL; } + if (time_to_dump_geoip_stats < now) { +#define DUMP_GEOIP_STATS_INTERVAL (60*60); + time_to_dump_geoip_stats = now + DUMP_GEOIP_STATS_INTERVAL; + dump_geoip_stats(); + } + /** 2. Periodically, we consider getting a new directory, getting a * new running-routers list, and/or force-uploading our descriptor * (if we've passed our internal checks). */ diff --git a/src/or/or.h b/src/or/or.h index 2ebe7511db..024f278a51 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -2358,6 +2358,10 @@ typedef struct { * count of how many client addresses have contacted us so that we can help * the bridge authority guess which countries have blocked access to us. */ int BridgeRecordUsageByCountry; +#ifdef ENABLE_GEOIP_STATS + int DirRecordUsageByCountry; +#endif + /** Optionally, a file with GeoIP data. */ char *GeoIPFile; @@ -3294,13 +3298,27 @@ int geoip_get_country_by_ip(uint32_t ipaddr); int geoip_get_n_countries(void); const char *geoip_get_country_name(int num); int geoip_is_loaded(void); -void geoip_note_client_seen(uint32_t addr, time_t now); +/** Indicates an action that we might be noting geoip statistics on. + * Note that if we're noticing CONNECT, we're a bridge, and if we're noticing + * the others, we're not. + */ +typedef enum { + /** We've noticed a connection as a bridge relay. */ + GEOIP_CLIENT_CONNECT = 0, + /** We've served a networkstatus consensus as a directory server. */ + GEOIP_CLIENT_NETWORKSTATUS = 1, + /** We've served a v2 networkstatus consensus as a directory server. */ + GEOIP_CLIENT_NETWORKSTATUS_V2 = 2, +} geoip_client_action_t; +void geoip_note_client_seen(geoip_client_action_t action, + uint32_t addr, time_t now); void geoip_remove_old_clients(time_t cutoff); time_t geoip_get_history_start(void); -char *geoip_get_client_history(time_t now); +char *geoip_get_client_history(time_t now, geoip_client_action_t action); int getinfo_helper_geoip(control_connection_t *control_conn, const char *question, char **answer); void geoip_free_all(void); +void dump_geoip_stats(void); /********************************* hibernate.c **********************/ diff --git a/src/or/router.c b/src/or/router.c index 8f2bf65482..972bd8525b 100644 --- a/src/or/router.c +++ b/src/or/router.c @@ -1830,7 +1830,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, geoip_remove_old_clients(now-48*60*60); last_purged_at = now; } - geoip_summary = geoip_get_client_history(time(NULL)); + geoip_summary = geoip_get_client_history(time(NULL), GEOIP_CLIENT_CONNECT); if (geoip_summary) { char geoip_start[ISO_TIME_LEN+1]; format_iso_time(geoip_start, geoip_get_history_start()); diff --git a/src/or/test.c b/src/or/test.c index 391efec4ea..6dcb521eeb 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -3908,28 +3908,28 @@ test_geoip(void) get_options()->BridgeRecordUsageByCountry = 1; /* Put 9 observations in AB... */ for (i=32; i < 40; ++i) - geoip_note_client_seen(i, now); - geoip_note_client_seen(225, now); + geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now); + geoip_note_client_seen(GEOIP_CLIENT_CONNECT, 225, now); /* and 3 observations in XY, several times. */ for (j=0; j < 10; ++j) for (i=52; i < 55; ++i) - geoip_note_client_seen(i, now-3600); + geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600); /* and 17 observations in ZZ... */ for (i=110; i < 127; ++i) - geoip_note_client_seen(i, now-7200); - s = geoip_get_client_history(now+5*24*60*60); + geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-7200); + s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT); test_assert(s); test_streq("zz=24,ab=16", s); tor_free(s); /* Now clear out all the zz observations. */ geoip_remove_old_clients(now-6000); - s = geoip_get_client_history(now+5*24*60*60); + s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT); test_assert(! s); /* There are only 12 observations left. Not enough to build an answer. Add 4 more in XY... */ for (i=55; i < 59; ++i) - geoip_note_client_seen(i, now-3600); - s = geoip_get_client_history(now+5*24*60*60); + geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600); + s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT); test_assert(s); test_streq("ab=16", s); tor_free(s); |