diff options
author | Nick Mathewson <nickm@torproject.org> | 2018-07-05 16:31:38 -0400 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2018-07-05 17:15:50 -0400 |
commit | 63b4ea22af8e8314dd718f02046de5f4b91edf9d (patch) | |
tree | af52b6fba37f22c86447fd5267dd5eb557807c8b /src/feature/stats | |
parent | ce84200542f48a92e8b56a8d032401ecd153e90c (diff) | |
download | tor-63b4ea22af8e8314dd718f02046de5f4b91edf9d.tar.gz tor-63b4ea22af8e8314dd718f02046de5f4b91edf9d.zip |
Move literally everything out of src/or
This commit won't build yet -- it just puts everything in a slightly
more logical place.
The reasoning here is that "src/core" will hold the stuff that every (or
nearly every) tor instance will need in order to do onion routing.
Other features (including some necessary ones) will live in
"src/feature". The "src/app" directory will hold the stuff needed
to have Tor be an application you can actually run.
This commit DOES NOT refactor the former contents of src/or into a
logical set of acyclic libraries, or change any code at all. That
will have to come in the future.
We will continue to move things around and split them in the future,
but I hope this lays a reasonable groundwork for doing so.
Diffstat (limited to 'src/feature/stats')
-rw-r--r-- | src/feature/stats/geoip.c | 1889 | ||||
-rw-r--r-- | src/feature/stats/geoip.h | 157 | ||||
-rw-r--r-- | src/feature/stats/rephist.c | 3220 | ||||
-rw-r--r-- | src/feature/stats/rephist.h | 140 |
4 files changed, 5406 insertions, 0 deletions
diff --git a/src/feature/stats/geoip.c b/src/feature/stats/geoip.c new file mode 100644 index 0000000000..3e6f20ea3f --- /dev/null +++ b/src/feature/stats/geoip.c @@ -0,0 +1,1889 @@ +/* Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file geoip.c + * \brief Functions related to maintaining an IP-to-country database; + * to summarizing client connections by country to entry guards, bridges, + * and directory servers; and for statistics on answering network status + * requests. + * + * There are two main kinds of functions in this module: geoip functions, + * which map groups of IPv4 and IPv6 addresses to country codes, and + * statistical functions, which collect statistics about different kinds of + * per-country usage. + * + * The geoip lookup tables are implemented as sorted lists of disjoint address + * ranges, each mapping to a singleton geoip_country_t. These country objects + * are also indexed by their names in a hashtable. + * + * The tables are populated from disk at startup by the geoip_load_file() + * function. For more information on the file format they read, see that + * function. See the scripts and the README file in src/config for more + * information about how those files are generated. + * + * Tor uses GeoIP information in order to implement user requests (such as + * ExcludeNodes {cc}), and to keep track of how much usage relays are getting + * for each country. + */ + +#define GEOIP_PRIVATE +#include "or/or.h" +#include "ht.h" +#include "lib/container/buffers.h" +#include "or/config.h" +#include "or/control.h" +#include "or/dnsserv.h" +#include "or/dos.h" +#include "or/geoip.h" +#include "or/routerlist.h" + +#include "lib/container/order.h" +#include "lib/time/tvdiff.h" + +static void init_geoip_countries(void); + +/** An entry from the GeoIP IPv4 file: maps an IPv4 range to a country. */ +typedef struct geoip_ipv4_entry_t { + uint32_t ip_low; /**< The lowest IP in the range, in host order */ + uint32_t ip_high; /**< The highest IP in the range, in host order */ + intptr_t country; /**< An index into geoip_countries */ +} geoip_ipv4_entry_t; + +/** An entry from the GeoIP IPv6 file: maps an IPv6 range to a country. */ +typedef struct geoip_ipv6_entry_t { + struct in6_addr ip_low; /**< The lowest IP in the range, in host order */ + struct in6_addr ip_high; /**< The highest IP in the range, in host order */ + intptr_t country; /**< An index into geoip_countries */ +} geoip_ipv6_entry_t; + +/** A per-country record for GeoIP request history. */ +typedef struct geoip_country_t { + char countrycode[3]; + uint32_t n_v3_ns_requests; +} geoip_country_t; + +/** A list of geoip_country_t */ +static smartlist_t *geoip_countries = NULL; +/** A map from lowercased country codes to their position in geoip_countries. + * The index is encoded in the pointer, and 1 is added so that NULL can mean + * not found. */ +static strmap_t *country_idxplus1_by_lc_code = NULL; +/** Lists of all known geoip_ipv4_entry_t and geoip_ipv6_entry_t, sorted + * by their respective ip_low. */ +static smartlist_t *geoip_ipv4_entries = NULL, *geoip_ipv6_entries = NULL; + +/** SHA1 digest of the GeoIP files to include in extra-info descriptors. */ +static char geoip_digest[DIGEST_LEN]; +static char geoip6_digest[DIGEST_LEN]; + +/* Total size in bytes of the geoip client history cache. Used by the OOM + * handler. */ +static size_t geoip_client_history_cache_size; + +/* Increment the geoip client history cache size counter with the given bytes. + * This prevents an overflow and set it to its maximum in that case. */ +static inline void +geoip_increment_client_history_cache_size(size_t bytes) +{ + /* This is shockingly high, lets log it so it can be reported. */ + IF_BUG_ONCE(geoip_client_history_cache_size > (SIZE_MAX - bytes)) { + geoip_client_history_cache_size = SIZE_MAX; + return; + } + geoip_client_history_cache_size += bytes; +} + +/* Decrement the geoip client history cache size counter with the given bytes. + * This prevents an underflow and set it to 0 in that case. */ +static inline void +geoip_decrement_client_history_cache_size(size_t bytes) +{ + /* Going below 0 means that we either allocated an entry without + * incrementing the counter or we have different sizes when allocating and + * freeing. It shouldn't happened so log it. */ + IF_BUG_ONCE(geoip_client_history_cache_size < bytes) { + geoip_client_history_cache_size = 0; + return; + } + geoip_client_history_cache_size -= bytes; +} + +/** Return the index of the <b>country</b>'s entry in the GeoIP + * country list if it is a valid 2-letter country code, otherwise + * return -1. */ +MOCK_IMPL(country_t, +geoip_get_country,(const char *country)) +{ + void *idxplus1_; + intptr_t idx; + + idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country); + if (!idxplus1_) + return -1; + + idx = ((uintptr_t)idxplus1_)-1; + return (country_t)idx; +} + +/** Add an entry to a GeoIP table, mapping all IP addresses between <b>low</b> + * and <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */ +static void +geoip_add_entry(const tor_addr_t *low, const tor_addr_t *high, + const char *country) +{ + intptr_t idx; + void *idxplus1_; + + IF_BUG_ONCE(tor_addr_family(low) != tor_addr_family(high)) + return; + IF_BUG_ONCE(tor_addr_compare(high, low, CMP_EXACT) < 0) + return; + + idxplus1_ = strmap_get_lc(country_idxplus1_by_lc_code, country); + + if (!idxplus1_) { + geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t)); + strlcpy(c->countrycode, country, sizeof(c->countrycode)); + tor_strlower(c->countrycode); + smartlist_add(geoip_countries, c); + idx = smartlist_len(geoip_countries) - 1; + strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1)); + } else { + idx = ((uintptr_t)idxplus1_)-1; + } + { + geoip_country_t *c = smartlist_get(geoip_countries, (int)idx); + tor_assert(!strcasecmp(c->countrycode, country)); + } + + if (tor_addr_family(low) == AF_INET) { + geoip_ipv4_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv4_entry_t)); + ent->ip_low = tor_addr_to_ipv4h(low); + ent->ip_high = tor_addr_to_ipv4h(high); + ent->country = idx; + smartlist_add(geoip_ipv4_entries, ent); + } else if (tor_addr_family(low) == AF_INET6) { + geoip_ipv6_entry_t *ent = tor_malloc_zero(sizeof(geoip_ipv6_entry_t)); + ent->ip_low = *tor_addr_to_in6_assert(low); + ent->ip_high = *tor_addr_to_in6_assert(high); + ent->country = idx; + smartlist_add(geoip_ipv6_entries, ent); + } +} + +/** Add an entry to the GeoIP table indicated by <b>family</b>, + * parsing it from <b>line</b>. The format is as for geoip_load_file(). */ +STATIC int +geoip_parse_entry(const char *line, sa_family_t family) +{ + tor_addr_t low_addr, high_addr; + char c[3]; + char *country = NULL; + + if (!geoip_countries) + init_geoip_countries(); + if (family == AF_INET) { + if (!geoip_ipv4_entries) + geoip_ipv4_entries = smartlist_new(); + } else if (family == AF_INET6) { + if (!geoip_ipv6_entries) + geoip_ipv6_entries = smartlist_new(); + } else { + log_warn(LD_GENERAL, "Unsupported family: %d", family); + return -1; + } + + while (TOR_ISSPACE(*line)) + ++line; + if (*line == '#') + return 0; + + char buf[512]; + if (family == AF_INET) { + unsigned int low, high; + if (tor_sscanf(line,"%u,%u,%2s", &low, &high, c) == 3 || + tor_sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, c) == 3) { + tor_addr_from_ipv4h(&low_addr, low); + tor_addr_from_ipv4h(&high_addr, high); + } else + goto fail; + country = c; + } else { /* AF_INET6 */ + char *low_str, *high_str; + struct in6_addr low, high; + char *strtok_state; + strlcpy(buf, line, sizeof(buf)); + low_str = tor_strtok_r(buf, ",", &strtok_state); + if (!low_str) + goto fail; + high_str = tor_strtok_r(NULL, ",", &strtok_state); + if (!high_str) + goto fail; + country = tor_strtok_r(NULL, "\n", &strtok_state); + if (!country) + goto fail; + if (strlen(country) != 2) + goto fail; + if (tor_inet_pton(AF_INET6, low_str, &low) <= 0) + goto fail; + tor_addr_from_in6(&low_addr, &low); + if (tor_inet_pton(AF_INET6, high_str, &high) <= 0) + goto fail; + tor_addr_from_in6(&high_addr, &high); + } + geoip_add_entry(&low_addr, &high_addr, country); + return 0; + + fail: + log_warn(LD_GENERAL, "Unable to parse line from GEOIP %s file: %s", + family == AF_INET ? "IPv4" : "IPv6", escaped(line)); + return -1; +} + +/** Sorting helper: return -1, 1, or 0 based on comparison of two + * geoip_ipv4_entry_t */ +static int +geoip_ipv4_compare_entries_(const void **_a, const void **_b) +{ + const geoip_ipv4_entry_t *a = *_a, *b = *_b; + if (a->ip_low < b->ip_low) + return -1; + else if (a->ip_low > b->ip_low) + return 1; + else + return 0; +} + +/** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer + * to a uint32_t in host order) to a geoip_ipv4_entry_t */ +static int +geoip_ipv4_compare_key_to_entry_(const void *_key, const void **_member) +{ + /* No alignment issue here, since _key really is a pointer to uint32_t */ + const uint32_t addr = *(uint32_t *)_key; + const geoip_ipv4_entry_t *entry = *_member; + if (addr < entry->ip_low) + return -1; + else if (addr > entry->ip_high) + return 1; + else + return 0; +} + +/** Sorting helper: return -1, 1, or 0 based on comparison of two + * geoip_ipv6_entry_t */ +static int +geoip_ipv6_compare_entries_(const void **_a, const void **_b) +{ + const geoip_ipv6_entry_t *a = *_a, *b = *_b; + return fast_memcmp(a->ip_low.s6_addr, b->ip_low.s6_addr, + sizeof(struct in6_addr)); +} + +/** bsearch helper: return -1, 1, or 0 based on comparison of an IPv6 + * (a pointer to a in6_addr) to a geoip_ipv6_entry_t */ +static int +geoip_ipv6_compare_key_to_entry_(const void *_key, const void **_member) +{ + const struct in6_addr *addr = (struct in6_addr *)_key; + const geoip_ipv6_entry_t *entry = *_member; + + if (fast_memcmp(addr->s6_addr, entry->ip_low.s6_addr, + sizeof(struct in6_addr)) < 0) + return -1; + else if (fast_memcmp(addr->s6_addr, entry->ip_high.s6_addr, + sizeof(struct in6_addr)) > 0) + return 1; + else + return 0; +} + +/** Return 1 if we should collect geoip stats on bridge users, and + * include them in our extrainfo descriptor. Else return 0. */ +int +should_record_bridge_info(const or_options_t *options) +{ + return options->BridgeRelay && options->BridgeRecordUsageByCountry; +} + +/** Set up a new list of geoip countries with no countries (yet) set in it, + * except for the unknown country. + */ +static void +init_geoip_countries(void) +{ + geoip_country_t *geoip_unresolved; + geoip_countries = smartlist_new(); + /* Add a geoip_country_t for requests that could not be resolved to a + * country as first element (index 0) to geoip_countries. */ + geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t)); + strlcpy(geoip_unresolved->countrycode, "??", + sizeof(geoip_unresolved->countrycode)); + smartlist_add(geoip_countries, geoip_unresolved); + country_idxplus1_by_lc_code = strmap_new(); + strmap_set_lc(country_idxplus1_by_lc_code, "??", (void*)(1)); +} + +/** Clear appropriate GeoIP database, based on <b>family</b>, and + * reload it from the file <b>filename</b>. Return 0 on success, -1 on + * failure. + * + * Recognized line formats for IPv4 are: + * INTIPLOW,INTIPHIGH,CC + * and + * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME" + * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned + * integers, and CC is a country code. + * + * Recognized line format for IPv6 is: + * IPV6LOW,IPV6HIGH,CC + * where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code. + * + * It also recognizes, and skips over, blank lines and lines that start + * with '#' (comments). + */ +int +geoip_load_file(sa_family_t family, const char *filename) +{ + FILE *f; + const char *msg = ""; + const or_options_t *options = get_options(); + int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO; + crypto_digest_t *geoip_digest_env = NULL; + + tor_assert(family == AF_INET || family == AF_INET6); + + if (!(f = tor_fopen_cloexec(filename, "r"))) { + log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s. %s", + filename, msg); + return -1; + } + if (!geoip_countries) + init_geoip_countries(); + + if (family == AF_INET) { + if (geoip_ipv4_entries) { + SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, e, + tor_free(e)); + smartlist_free(geoip_ipv4_entries); + } + geoip_ipv4_entries = smartlist_new(); + } else { /* AF_INET6 */ + if (geoip_ipv6_entries) { + SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, e, + tor_free(e)); + smartlist_free(geoip_ipv6_entries); + } + geoip_ipv6_entries = smartlist_new(); + } + geoip_digest_env = crypto_digest_new(); + + log_notice(LD_GENERAL, "Parsing GEOIP %s file %s.", + (family == AF_INET) ? "IPv4" : "IPv6", filename); + while (!feof(f)) { + char buf[512]; + if (fgets(buf, (int)sizeof(buf), f) == NULL) + break; + crypto_digest_add_bytes(geoip_digest_env, buf, strlen(buf)); + /* FFFF track full country name. */ + geoip_parse_entry(buf, family); + } + /*XXXX abort and return -1 if no entries/illformed?*/ + fclose(f); + + /* Sort list and remember file digests so that we can include it in + * our extra-info descriptors. */ + if (family == AF_INET) { + smartlist_sort(geoip_ipv4_entries, geoip_ipv4_compare_entries_); + /* Okay, now we need to maybe change our mind about what is in + * which country. We do this for IPv4 only since that's what we + * store in node->country. */ + refresh_all_country_info(); + crypto_digest_get_digest(geoip_digest_env, geoip_digest, DIGEST_LEN); + } else { + /* AF_INET6 */ + smartlist_sort(geoip_ipv6_entries, geoip_ipv6_compare_entries_); + crypto_digest_get_digest(geoip_digest_env, geoip6_digest, DIGEST_LEN); + } + crypto_digest_free(geoip_digest_env); + + return 0; +} + +/** Given an IP address in host order, return a number representing the + * country to which that address belongs, -1 for "No geoip information + * available", or 0 for the 'unknown country'. The return value will always + * be less than geoip_get_n_countries(). To decode it, call + * geoip_get_country_name(). + */ +STATIC int +geoip_get_country_by_ipv4(uint32_t ipaddr) +{ + geoip_ipv4_entry_t *ent; + if (!geoip_ipv4_entries) + return -1; + ent = smartlist_bsearch(geoip_ipv4_entries, &ipaddr, + geoip_ipv4_compare_key_to_entry_); + return ent ? (int)ent->country : 0; +} + +/** Given an IPv6 address, return a number representing the country to + * which that address belongs, -1 for "No geoip information available", or + * 0 for the 'unknown country'. The return value will always be less than + * geoip_get_n_countries(). To decode it, call geoip_get_country_name(). + */ +STATIC int +geoip_get_country_by_ipv6(const struct in6_addr *addr) +{ + geoip_ipv6_entry_t *ent; + + if (!geoip_ipv6_entries) + return -1; + ent = smartlist_bsearch(geoip_ipv6_entries, addr, + geoip_ipv6_compare_key_to_entry_); + return ent ? (int)ent->country : 0; +} + +/** Given an IP address, return a number representing the country to which + * that address belongs, -1 for "No geoip information available", or 0 for + * the 'unknown country'. The return value will always be less than + * geoip_get_n_countries(). To decode it, call geoip_get_country_name(). + */ +MOCK_IMPL(int, +geoip_get_country_by_addr,(const tor_addr_t *addr)) +{ + if (tor_addr_family(addr) == AF_INET) { + return geoip_get_country_by_ipv4(tor_addr_to_ipv4h(addr)); + } else if (tor_addr_family(addr) == AF_INET6) { + return geoip_get_country_by_ipv6(tor_addr_to_in6(addr)); + } else { + return -1; + } +} + +/** Return the number of countries recognized by the GeoIP country list. */ +MOCK_IMPL(int, +geoip_get_n_countries,(void)) +{ + if (!geoip_countries) + init_geoip_countries(); + return (int) smartlist_len(geoip_countries); +} + +/** Return the two-letter country code associated with the number <b>num</b>, + * or "??" for an unknown value. */ +const char * +geoip_get_country_name(country_t num) +{ + if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) { + geoip_country_t *c = smartlist_get(geoip_countries, num); + return c->countrycode; + } else + return "??"; +} + +/** Return true iff we have loaded a GeoIP database.*/ +MOCK_IMPL(int, +geoip_is_loaded,(sa_family_t family)) +{ + tor_assert(family == AF_INET || family == AF_INET6); + if (geoip_countries == NULL) + return 0; + if (family == AF_INET) + return geoip_ipv4_entries != NULL; + else /* AF_INET6 */ + return geoip_ipv6_entries != NULL; +} + +/** Return the hex-encoded SHA1 digest of the loaded GeoIP file. The + * result does not need to be deallocated, but will be overwritten by the + * next call of hex_str(). */ +const char * +geoip_db_digest(sa_family_t family) +{ + tor_assert(family == AF_INET || family == AF_INET6); + if (family == AF_INET) + return hex_str(geoip_digest, DIGEST_LEN); + else /* AF_INET6 */ + return hex_str(geoip6_digest, DIGEST_LEN); +} + +/** Largest allowable value for last_seen_in_minutes. (It's a 30-bit field, + * so it can hold up to (1u<<30)-1, or 0x3fffffffu. + */ +#define MAX_LAST_SEEN_IN_MINUTES 0X3FFFFFFFu + +/** Map from client IP address to last time seen. */ +static HT_HEAD(clientmap, clientmap_entry_t) client_history = + HT_INITIALIZER(); + +/** Hashtable helper: compute a hash of a clientmap_entry_t. */ +static inline unsigned +clientmap_entry_hash(const clientmap_entry_t *a) +{ + unsigned h = (unsigned) tor_addr_hash(&a->addr); + + if (a->transport_name) + h += (unsigned) siphash24g(a->transport_name, strlen(a->transport_name)); + + return h; +} +/** Hashtable helper: compare two clientmap_entry_t values for equality. */ +static inline int +clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b) +{ + if (strcmp_opt(a->transport_name, b->transport_name)) + return 0; + + return !tor_addr_compare(&a->addr, &b->addr, CMP_EXACT) && + a->action == b->action; +} + +HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash, + clientmap_entries_eq) +HT_GENERATE2(clientmap, clientmap_entry_t, node, clientmap_entry_hash, + clientmap_entries_eq, 0.6, tor_reallocarray_, tor_free_) + +#define clientmap_entry_free(ent) \ + FREE_AND_NULL(clientmap_entry_t, clientmap_entry_free_, ent) + +/** Return the size of a client map entry. */ +static inline size_t +clientmap_entry_size(const clientmap_entry_t *ent) +{ + tor_assert(ent); + return (sizeof(clientmap_entry_t) + + (ent->transport_name ? strlen(ent->transport_name) : 0)); +} + +/** Free all storage held by <b>ent</b>. */ +static void +clientmap_entry_free_(clientmap_entry_t *ent) +{ + if (!ent) + return; + + /* This entry is about to be freed so pass it to the DoS subsystem to see if + * any actions can be taken about it. */ + dos_geoip_entry_about_to_free(ent); + geoip_decrement_client_history_cache_size(clientmap_entry_size(ent)); + + tor_free(ent->transport_name); + tor_free(ent); +} + +/* Return a newly allocated clientmap entry with the given action and address + * that are mandatory. The transport_name can be optional. This can't fail. */ +static clientmap_entry_t * +clientmap_entry_new(geoip_client_action_t action, const tor_addr_t *addr, + const char *transport_name) +{ + clientmap_entry_t *entry; + + tor_assert(action == GEOIP_CLIENT_CONNECT || + action == GEOIP_CLIENT_NETWORKSTATUS); + tor_assert(addr); + + entry = tor_malloc_zero(sizeof(clientmap_entry_t)); + entry->action = action; + tor_addr_copy(&entry->addr, addr); + if (transport_name) { + entry->transport_name = tor_strdup(transport_name); + } + + /* Allocated and initialized, note down its size for the OOM handler. */ + geoip_increment_client_history_cache_size(clientmap_entry_size(entry)); + + return entry; +} + +/** Clear history of connecting clients used by entry and bridge stats. */ +static void +client_history_clear(void) +{ + clientmap_entry_t **ent, **next, *this; + for (ent = HT_START(clientmap, &client_history); ent != NULL; + ent = next) { + if ((*ent)->action == GEOIP_CLIENT_CONNECT) { + this = *ent; + next = HT_NEXT_RMV(clientmap, &client_history, ent); + clientmap_entry_free(this); + } else { + next = HT_NEXT(clientmap, &client_history, ent); + } + } +} + +/** Note that we've seen a client connect from the IP <b>addr</b> + * at time <b>now</b>. Ignored by all but bridges and directories if + * configured accordingly. */ +void +geoip_note_client_seen(geoip_client_action_t action, + const tor_addr_t *addr, + const char *transport_name, + time_t now) +{ + const or_options_t *options = get_options(); + clientmap_entry_t *ent; + + if (action == GEOIP_CLIENT_CONNECT) { + /* Only remember statistics if the DoS mitigation subsystem is enabled. If + * not, only if as entry guard or as bridge. */ + if (!dos_enabled()) { + if (!options->EntryStatistics && !should_record_bridge_info(options)) { + return; + } + } + } else { + /* Only gather directory-request statistics if configured, and + * forcibly disable them on bridge authorities. */ + if (!options->DirReqStatistics || options->BridgeAuthoritativeDir) + return; + } + + log_debug(LD_GENERAL, "Seen client from '%s' with transport '%s'.", + safe_str_client(fmt_addr((addr))), + transport_name ? transport_name : "<no transport>"); + + ent = geoip_lookup_client(addr, transport_name, action); + if (! ent) { + ent = clientmap_entry_new(action, addr, transport_name); + HT_INSERT(clientmap, &client_history, ent); + } + if (now / 60 <= (int)MAX_LAST_SEEN_IN_MINUTES && now >= 0) + ent->last_seen_in_minutes = (unsigned)(now/60); + else + ent->last_seen_in_minutes = 0; + + if (action == GEOIP_CLIENT_NETWORKSTATUS) { + int country_idx = geoip_get_country_by_addr(addr); + if (country_idx < 0) + country_idx = 0; /** unresolved requests are stored at index 0. */ + if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) { + geoip_country_t *country = smartlist_get(geoip_countries, country_idx); + ++country->n_v3_ns_requests; + } + } +} + +/** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's + * older than a certain time. */ +static int +remove_old_client_helper_(struct clientmap_entry_t *ent, void *_cutoff) +{ + time_t cutoff = *(time_t*)_cutoff / 60; + if (ent->last_seen_in_minutes < cutoff) { + clientmap_entry_free(ent); + return 1; + } else { + return 0; + } +} + +/** Forget about all clients that haven't connected since <b>cutoff</b>. */ +void +geoip_remove_old_clients(time_t cutoff) +{ + clientmap_HT_FOREACH_FN(&client_history, + remove_old_client_helper_, + &cutoff); +} + +/* Return a client entry object matching the given address, transport name and + * geoip action from the clientmap. NULL if not found. The transport_name can + * be NULL. */ +clientmap_entry_t * +geoip_lookup_client(const tor_addr_t *addr, const char *transport_name, + geoip_client_action_t action) +{ + clientmap_entry_t lookup; + + tor_assert(addr); + + /* We always look for a client connection with no transport. */ + tor_addr_copy(&lookup.addr, addr); + lookup.action = action; + lookup.transport_name = (char *) transport_name; + + return HT_FIND(clientmap, &client_history, &lookup); +} + +/* Cleanup client entries older than the cutoff. Used for the OOM. Return the + * number of bytes freed. If 0 is returned, nothing was freed. */ +static size_t +oom_clean_client_entries(time_t cutoff) +{ + size_t bytes = 0; + clientmap_entry_t **ent, **ent_next; + + for (ent = HT_START(clientmap, &client_history); ent; ent = ent_next) { + clientmap_entry_t *entry = *ent; + if (entry->last_seen_in_minutes < (cutoff / 60)) { + ent_next = HT_NEXT_RMV(clientmap, &client_history, ent); + bytes += clientmap_entry_size(entry); + clientmap_entry_free(entry); + } else { + ent_next = HT_NEXT(clientmap, &client_history, ent); + } + } + return bytes; +} + +/* Below this minimum lifetime, the OOM won't cleanup any entries. */ +#define GEOIP_CLIENT_CACHE_OOM_MIN_CUTOFF (4 * 60 * 60) +/* The OOM moves the cutoff by that much every run. */ +#define GEOIP_CLIENT_CACHE_OOM_STEP (15 * 50) + +/* Cleanup the geoip client history cache called from the OOM handler. Return + * the amount of bytes removed. This can return a value below or above + * min_remove_bytes but will stop as oon as the min_remove_bytes has been + * reached. */ +size_t +geoip_client_cache_handle_oom(time_t now, size_t min_remove_bytes) +{ + time_t k; + size_t bytes_removed = 0; + + /* Our OOM handler called with 0 bytes to remove is a code flow error. */ + tor_assert(min_remove_bytes != 0); + + /* Set k to the initial cutoff of an entry. We then going to move it by step + * to try to remove as much as we can. */ + k = WRITE_STATS_INTERVAL; + + do { + time_t cutoff; + + /* If k has reached the minimum lifetime, we have to stop else we might + * remove every single entries which would be pretty bad for the DoS + * mitigation subsystem if by just filling the geoip cache, it was enough + * to trigger the OOM and clean every single entries. */ + if (k <= GEOIP_CLIENT_CACHE_OOM_MIN_CUTOFF) { + break; + } + + cutoff = now - k; + bytes_removed += oom_clean_client_entries(cutoff); + k -= GEOIP_CLIENT_CACHE_OOM_STEP; + } while (bytes_removed < min_remove_bytes); + + return bytes_removed; +} + +/* Return the total size in bytes of the client history cache. */ +size_t +geoip_client_cache_total_allocation(void) +{ + return geoip_client_history_cache_size; +} + +/** How many responses are we giving to clients requesting v3 network + * statuses? */ +static uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM]; + +/** Note that we've rejected a client's request for a v3 network status + * for reason <b>reason</b> at time <b>now</b>. */ +void +geoip_note_ns_response(geoip_ns_response_t response) +{ + static int arrays_initialized = 0; + if (!get_options()->DirReqStatistics) + return; + if (!arrays_initialized) { + memset(ns_v3_responses, 0, sizeof(ns_v3_responses)); + arrays_initialized = 1; + } + tor_assert(response < GEOIP_NS_RESPONSE_NUM); + ns_v3_responses[response]++; +} + +/** Do not mention any country from which fewer than this number of IPs have + * connected. This conceivably avoids reporting information that could + * deanonymize users, though analysis is lacking. */ +#define MIN_IPS_TO_NOTE_COUNTRY 1 +/** Do not report any geoip data at all if we have fewer than this number of + * IPs to report about. */ +#define MIN_IPS_TO_NOTE_ANYTHING 1 +/** When reporting geoip data about countries, round up to the nearest + * multiple of this value. */ +#define IP_GRANULARITY 8 + +/** Helper type: used to sort per-country totals by value. */ +typedef struct c_hist_t { + char country[3]; /**< Two-letter country code. */ + unsigned total; /**< Total IP addresses seen in this country. */ +} c_hist_t; + +/** Sorting helper: return -1, 1, or 0 based on comparison of two + * geoip_ipv4_entry_t. Sort in descending order of total, and then by country + * code. */ +static int +c_hist_compare_(const void **_a, const void **_b) +{ + const c_hist_t *a = *_a, *b = *_b; + if (a->total > b->total) + return -1; + else if (a->total < b->total) + return 1; + else + return strcmp(a->country, b->country); +} + +/** When there are incomplete directory requests at the end of a 24-hour + * period, consider those requests running for longer than this timeout as + * failed, the others as still running. */ +#define DIRREQ_TIMEOUT (10*60) + +/** Entry in a map from either chan->global_identifier for direct requests + * or a unique circuit identifier for tunneled requests to request time, + * response size, and completion time of a network status request. Used to + * measure download times of requests to derive average client + * bandwidths. */ +typedef struct dirreq_map_entry_t { + HT_ENTRY(dirreq_map_entry_t) node; + /** Unique identifier for this network status request; this is either the + * chan->global_identifier of the dir channel (direct request) or a new + * locally unique identifier of a circuit (tunneled request). This ID is + * only unique among other direct or tunneled requests, respectively. */ + uint64_t dirreq_id; + unsigned int state:3; /**< State of this directory request. */ + unsigned int type:1; /**< Is this a direct or a tunneled request? */ + unsigned int completed:1; /**< Is this request complete? */ + /** When did we receive the request and started sending the response? */ + struct timeval request_time; + size_t response_size; /**< What is the size of the response in bytes? */ + struct timeval completion_time; /**< When did the request succeed? */ +} dirreq_map_entry_t; + +/** Map of all directory requests asking for v2 or v3 network statuses in + * the current geoip-stats interval. Values are + * of type *<b>dirreq_map_entry_t</b>. */ +static HT_HEAD(dirreqmap, dirreq_map_entry_t) dirreq_map = + HT_INITIALIZER(); + +static int +dirreq_map_ent_eq(const dirreq_map_entry_t *a, + const dirreq_map_entry_t *b) +{ + return a->dirreq_id == b->dirreq_id && a->type == b->type; +} + +/* DOCDOC dirreq_map_ent_hash */ +static unsigned +dirreq_map_ent_hash(const dirreq_map_entry_t *entry) +{ + unsigned u = (unsigned) entry->dirreq_id; + u += entry->type << 20; + return u; +} + +HT_PROTOTYPE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash, + dirreq_map_ent_eq) +HT_GENERATE2(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash, + dirreq_map_ent_eq, 0.6, tor_reallocarray_, tor_free_) + +/** Helper: Put <b>entry</b> into map of directory requests using + * <b>type</b> and <b>dirreq_id</b> as key parts. If there is + * already an entry for that key, print out a BUG warning and return. */ +static void +dirreq_map_put_(dirreq_map_entry_t *entry, dirreq_type_t type, + uint64_t dirreq_id) +{ + dirreq_map_entry_t *old_ent; + tor_assert(entry->type == type); + tor_assert(entry->dirreq_id == dirreq_id); + + /* XXXX we could switch this to HT_INSERT some time, since it seems that + * this bug doesn't happen. But since this function doesn't seem to be + * critical-path, it's sane to leave it alone. */ + old_ent = HT_REPLACE(dirreqmap, &dirreq_map, entry); + if (old_ent && old_ent != entry) { + log_warn(LD_BUG, "Error when putting directory request into local " + "map. There was already an entry for the same identifier."); + return; + } +} + +/** Helper: Look up and return an entry in the map of directory requests + * using <b>type</b> and <b>dirreq_id</b> as key parts. If there + * is no such entry, return NULL. */ +static dirreq_map_entry_t * +dirreq_map_get_(dirreq_type_t type, uint64_t dirreq_id) +{ + dirreq_map_entry_t lookup; + lookup.type = type; + lookup.dirreq_id = dirreq_id; + return HT_FIND(dirreqmap, &dirreq_map, &lookup); +} + +/** Note that an either direct or tunneled (see <b>type</b>) directory + * request for a v3 network status with unique ID <b>dirreq_id</b> of size + * <b>response_size</b> has started. */ +void +geoip_start_dirreq(uint64_t dirreq_id, size_t response_size, + dirreq_type_t type) +{ + dirreq_map_entry_t *ent; + if (!get_options()->DirReqStatistics) + return; + ent = tor_malloc_zero(sizeof(dirreq_map_entry_t)); + ent->dirreq_id = dirreq_id; + tor_gettimeofday(&ent->request_time); + ent->response_size = response_size; + ent->type = type; + dirreq_map_put_(ent, type, dirreq_id); +} + +/** Change the state of the either direct or tunneled (see <b>type</b>) + * directory request with <b>dirreq_id</b> to <b>new_state</b> and + * possibly mark it as completed. If no entry can be found for the given + * key parts (e.g., if this is a directory request that we are not + * measuring, or one that was started in the previous measurement period), + * or if the state cannot be advanced to <b>new_state</b>, do nothing. */ +void +geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type, + dirreq_state_t new_state) +{ + dirreq_map_entry_t *ent; + if (!get_options()->DirReqStatistics) + return; + ent = dirreq_map_get_(type, dirreq_id); + if (!ent) + return; + if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS) + return; + if (new_state - 1 != ent->state) + return; + ent->state = new_state; + if ((type == DIRREQ_DIRECT && + new_state == DIRREQ_FLUSHING_DIR_CONN_FINISHED) || + (type == DIRREQ_TUNNELED && + new_state == DIRREQ_CHANNEL_BUFFER_FLUSHED)) { + tor_gettimeofday(&ent->completion_time); + ent->completed = 1; + } +} + +/** Return the bridge-ip-transports string that should be inserted in + * our extra-info descriptor. Return NULL if the bridge-ip-transports + * line should be empty. */ +char * +geoip_get_transport_history(void) +{ + unsigned granularity = IP_GRANULARITY; + /** String hash table (name of transport) -> (number of users). */ + strmap_t *transport_counts = strmap_new(); + + /** Smartlist that contains copies of the names of the transports + that have been used. */ + smartlist_t *transports_used = smartlist_new(); + + /* Special string to signify that no transport was used for this + connection. Pluggable transport names can't have symbols in their + names, so this string will never collide with a real transport. */ + static const char* no_transport_str = "<OR>"; + + clientmap_entry_t **ent; + smartlist_t *string_chunks = smartlist_new(); + char *the_string = NULL; + + /* If we haven't seen any clients yet, return NULL. */ + if (HT_EMPTY(&client_history)) + goto done; + + /** We do the following steps to form the transport history string: + * a) Foreach client that uses a pluggable transport, we increase the + * times that transport was used by one. If the client did not use + * a transport, we increase the number of times someone connected + * without obfuscation. + * b) Foreach transport we observed, we write its transport history + * string and push it to string_chunks. So, for example, if we've + * seen 665 obfs2 clients, we write "obfs2=665". + * c) We concatenate string_chunks to form the final string. + */ + + log_debug(LD_GENERAL,"Starting iteration for transport history. %d clients.", + HT_SIZE(&client_history)); + + /* Loop through all clients. */ + HT_FOREACH(ent, clientmap, &client_history) { + uintptr_t val; + void *ptr; + const char *transport_name = (*ent)->transport_name; + if (!transport_name) + transport_name = no_transport_str; + + /* Increase the count for this transport name. */ + ptr = strmap_get(transport_counts, transport_name); + val = (uintptr_t)ptr; + val++; + ptr = (void*)val; + strmap_set(transport_counts, transport_name, ptr); + + /* If it's the first time we see this transport, note it. */ + if (val == 1) + smartlist_add_strdup(transports_used, transport_name); + + log_debug(LD_GENERAL, "Client from '%s' with transport '%s'. " + "I've now seen %d clients.", + safe_str_client(fmt_addr(&(*ent)->addr)), + transport_name ? transport_name : "<no transport>", + (int)val); + } + + /* Sort the transport names (helps with unit testing). */ + smartlist_sort_strings(transports_used); + + /* Loop through all seen transports. */ + SMARTLIST_FOREACH_BEGIN(transports_used, const char *, transport_name) { + void *transport_count_ptr = strmap_get(transport_counts, transport_name); + uintptr_t transport_count = (uintptr_t) transport_count_ptr; + + log_debug(LD_GENERAL, "We got %"PRIu64" clients with transport '%s'.", + ((uint64_t)transport_count), transport_name); + + smartlist_add_asprintf(string_chunks, "%s=%"PRIu64, + transport_name, + (round_uint64_to_next_multiple_of( + (uint64_t)transport_count, + granularity))); + } SMARTLIST_FOREACH_END(transport_name); + + the_string = smartlist_join_strings(string_chunks, ",", 0, NULL); + + log_debug(LD_GENERAL, "Final bridge-ip-transports string: '%s'", the_string); + + done: + strmap_free(transport_counts, NULL); + SMARTLIST_FOREACH(transports_used, char *, s, tor_free(s)); + smartlist_free(transports_used); + SMARTLIST_FOREACH(string_chunks, char *, s, tor_free(s)); + smartlist_free(string_chunks); + + return the_string; +} + +/** Return a newly allocated comma-separated string containing statistics + * on network status downloads. The string contains the number of completed + * requests, timeouts, and still running requests as well as the download + * times by deciles and quartiles. Return NULL if we have not observed + * requests for long enough. */ +static char * +geoip_get_dirreq_history(dirreq_type_t type) +{ + char *result = NULL; + buf_t *buf = NULL; + smartlist_t *dirreq_completed = NULL; + uint32_t complete = 0, timeouts = 0, running = 0; + dirreq_map_entry_t **ptr, **next; + struct timeval now; + + tor_gettimeofday(&now); + dirreq_completed = smartlist_new(); + for (ptr = HT_START(dirreqmap, &dirreq_map); ptr; ptr = next) { + dirreq_map_entry_t *ent = *ptr; + if (ent->type != type) { + next = HT_NEXT(dirreqmap, &dirreq_map, ptr); + continue; + } else { + if (ent->completed) { + smartlist_add(dirreq_completed, ent); + complete++; + next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr); + } else { + if (tv_mdiff(&ent->request_time, &now) / 1000 > DIRREQ_TIMEOUT) + timeouts++; + else + running++; + next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr); + tor_free(ent); + } + } + } +#define DIR_REQ_GRANULARITY 4 + complete = round_uint32_to_next_multiple_of(complete, + DIR_REQ_GRANULARITY); + timeouts = round_uint32_to_next_multiple_of(timeouts, + DIR_REQ_GRANULARITY); + running = round_uint32_to_next_multiple_of(running, + DIR_REQ_GRANULARITY); + buf = buf_new_with_capacity(1024); + buf_add_printf(buf, "complete=%u,timeout=%u," + "running=%u", complete, timeouts, running); + +#define MIN_DIR_REQ_RESPONSES 16 + if (complete >= MIN_DIR_REQ_RESPONSES) { + uint32_t *dltimes; + /* We may have rounded 'completed' up. Here we want to use the + * real value. */ + complete = smartlist_len(dirreq_completed); + dltimes = tor_calloc(complete, sizeof(uint32_t)); + SMARTLIST_FOREACH_BEGIN(dirreq_completed, dirreq_map_entry_t *, ent) { + uint32_t bytes_per_second; + uint32_t time_diff = (uint32_t) tv_mdiff(&ent->request_time, + &ent->completion_time); + if (time_diff == 0) + time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible + * by law of nature or something, but a millisecond + * is a bit greater than "instantly" */ + bytes_per_second = (uint32_t)(1000 * ent->response_size / time_diff); + dltimes[ent_sl_idx] = bytes_per_second; + } SMARTLIST_FOREACH_END(ent); + median_uint32(dltimes, complete); /* sorts as a side effect. */ + buf_add_printf(buf, + ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u," + "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u", + dltimes[0], + dltimes[1*complete/10-1], + dltimes[2*complete/10-1], + dltimes[1*complete/4-1], + dltimes[3*complete/10-1], + dltimes[4*complete/10-1], + dltimes[5*complete/10-1], + dltimes[6*complete/10-1], + dltimes[7*complete/10-1], + dltimes[3*complete/4-1], + dltimes[8*complete/10-1], + dltimes[9*complete/10-1], + dltimes[complete-1]); + tor_free(dltimes); + } + + result = buf_extract(buf, NULL); + + SMARTLIST_FOREACH(dirreq_completed, dirreq_map_entry_t *, ent, + tor_free(ent)); + smartlist_free(dirreq_completed); + buf_free(buf); + return result; +} + +/** Store a newly allocated comma-separated string in + * *<a>country_str</a> containing entries for all the countries from + * which we've seen enough clients connect as a bridge, directory + * server, or entry guard. The entry format is cc=num where num is the + * number of IPs we've seen connecting from that country, and cc is a + * lowercased country code. *<a>country_str</a> is set to NULL if + * we're not ready to export per country data yet. + * + * Store a newly allocated comma-separated string in <a>ipver_str</a> + * containing entries for clients connecting over IPv4 and IPv6. The + * format is family=num where num is the nubmer of IPs we've seen + * connecting over that protocol family, and family is 'v4' or 'v6'. + * + * Return 0 on success and -1 if we're missing geoip data. */ +int +geoip_get_client_history(geoip_client_action_t action, + char **country_str, char **ipver_str) +{ + unsigned granularity = IP_GRANULARITY; + smartlist_t *entries = NULL; + int n_countries = geoip_get_n_countries(); + int i; + clientmap_entry_t **cm_ent; + unsigned *counts = NULL; + unsigned total = 0; + unsigned ipv4_count = 0, ipv6_count = 0; + + if (!geoip_is_loaded(AF_INET) && !geoip_is_loaded(AF_INET6)) + return -1; + + counts = tor_calloc(n_countries, sizeof(unsigned)); + HT_FOREACH(cm_ent, clientmap, &client_history) { + int country; + if ((*cm_ent)->action != (int)action) + continue; + country = geoip_get_country_by_addr(&(*cm_ent)->addr); + if (country < 0) + country = 0; /** unresolved requests are stored at index 0. */ + tor_assert(0 <= country && country < n_countries); + ++counts[country]; + ++total; + switch (tor_addr_family(&(*cm_ent)->addr)) { + case AF_INET: + ipv4_count++; + break; + case AF_INET6: + ipv6_count++; + break; + } + } + if (ipver_str) { + smartlist_t *chunks = smartlist_new(); + smartlist_add_asprintf(chunks, "v4=%u", + round_to_next_multiple_of(ipv4_count, granularity)); + smartlist_add_asprintf(chunks, "v6=%u", + round_to_next_multiple_of(ipv6_count, granularity)); + *ipver_str = smartlist_join_strings(chunks, ",", 0, NULL); + SMARTLIST_FOREACH(chunks, char *, c, tor_free(c)); + smartlist_free(chunks); + } + + /* Don't record per country data if we haven't seen enough IPs. */ + if (total < MIN_IPS_TO_NOTE_ANYTHING) { + tor_free(counts); + if (country_str) + *country_str = NULL; + return 0; + } + + /* Make a list of c_hist_t */ + entries = smartlist_new(); + for (i = 0; i < n_countries; ++i) { + unsigned c = counts[i]; + const char *countrycode; + c_hist_t *ent; + /* Only report a country if it has a minimum number of IPs. */ + if (c >= MIN_IPS_TO_NOTE_COUNTRY) { + c = round_to_next_multiple_of(c, granularity); + countrycode = geoip_get_country_name(i); + ent = tor_malloc(sizeof(c_hist_t)); + strlcpy(ent->country, countrycode, sizeof(ent->country)); + ent->total = c; + smartlist_add(entries, ent); + } + } + /* Sort entries. Note that we must do this _AFTER_ rounding, or else + * the sort order could leak info. */ + smartlist_sort(entries, c_hist_compare_); + + if (country_str) { + smartlist_t *chunks = smartlist_new(); + SMARTLIST_FOREACH(entries, c_hist_t *, ch, { + smartlist_add_asprintf(chunks, "%s=%u", ch->country, ch->total); + }); + *country_str = smartlist_join_strings(chunks, ",", 0, NULL); + SMARTLIST_FOREACH(chunks, char *, c, tor_free(c)); + smartlist_free(chunks); + } + + SMARTLIST_FOREACH(entries, c_hist_t *, c, tor_free(c)); + smartlist_free(entries); + tor_free(counts); + + return 0; +} + +/** Return a newly allocated string holding the per-country request history + * for v3 network statuses in a format suitable for an extra-info document, + * or NULL on failure. */ +char * +geoip_get_request_history(void) +{ + smartlist_t *entries, *strings; + char *result; + unsigned granularity = IP_GRANULARITY; + + if (!geoip_countries) + return NULL; + + entries = smartlist_new(); + SMARTLIST_FOREACH_BEGIN(geoip_countries, geoip_country_t *, c) { + uint32_t tot = 0; + c_hist_t *ent; + tot = c->n_v3_ns_requests; + if (!tot) + continue; + ent = tor_malloc_zero(sizeof(c_hist_t)); + strlcpy(ent->country, c->countrycode, sizeof(ent->country)); + ent->total = round_to_next_multiple_of(tot, granularity); + smartlist_add(entries, ent); + } SMARTLIST_FOREACH_END(c); + smartlist_sort(entries, c_hist_compare_); + + strings = smartlist_new(); + SMARTLIST_FOREACH(entries, c_hist_t *, ent, { + smartlist_add_asprintf(strings, "%s=%u", ent->country, ent->total); + }); + result = smartlist_join_strings(strings, ",", 0, NULL); + SMARTLIST_FOREACH(strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(entries, c_hist_t *, ent, tor_free(ent)); + smartlist_free(strings); + smartlist_free(entries); + return result; +} + +/** Start time of directory request stats or 0 if we're not collecting + * directory request statistics. */ +static time_t start_of_dirreq_stats_interval; + +/** Initialize directory request stats. */ +void +geoip_dirreq_stats_init(time_t now) +{ + start_of_dirreq_stats_interval = now; +} + +/** Reset counters for dirreq stats. */ +void +geoip_reset_dirreq_stats(time_t now) +{ + SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, { + c->n_v3_ns_requests = 0; + }); + { + clientmap_entry_t **ent, **next, *this; + for (ent = HT_START(clientmap, &client_history); ent != NULL; + ent = next) { + if ((*ent)->action == GEOIP_CLIENT_NETWORKSTATUS) { + this = *ent; + next = HT_NEXT_RMV(clientmap, &client_history, ent); + clientmap_entry_free(this); + } else { + next = HT_NEXT(clientmap, &client_history, ent); + } + } + } + memset(ns_v3_responses, 0, sizeof(ns_v3_responses)); + { + dirreq_map_entry_t **ent, **next, *this; + for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) { + this = *ent; + next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent); + tor_free(this); + } + } + start_of_dirreq_stats_interval = now; +} + +/** Stop collecting directory request stats in a way that we can re-start + * doing so in geoip_dirreq_stats_init(). */ +void +geoip_dirreq_stats_term(void) +{ + geoip_reset_dirreq_stats(0); +} + +/** Return a newly allocated string containing the dirreq statistics + * until <b>now</b>, or NULL if we're not collecting dirreq stats. Caller + * must ensure start_of_dirreq_stats_interval is in the past. */ +char * +geoip_format_dirreq_stats(time_t now) +{ + char t[ISO_TIME_LEN+1]; + int i; + char *v3_ips_string = NULL, *v3_reqs_string = NULL, + *v3_direct_dl_string = NULL, *v3_tunneled_dl_string = NULL; + char *result = NULL; + + if (!start_of_dirreq_stats_interval) + return NULL; /* Not initialized. */ + + tor_assert(now >= start_of_dirreq_stats_interval); + + format_iso_time(t, now); + geoip_get_client_history(GEOIP_CLIENT_NETWORKSTATUS, &v3_ips_string, NULL); + v3_reqs_string = geoip_get_request_history(); + +#define RESPONSE_GRANULARITY 8 + for (i = 0; i < GEOIP_NS_RESPONSE_NUM; i++) { + ns_v3_responses[i] = round_uint32_to_next_multiple_of( + ns_v3_responses[i], RESPONSE_GRANULARITY); + } +#undef RESPONSE_GRANULARITY + + v3_direct_dl_string = geoip_get_dirreq_history(DIRREQ_DIRECT); + v3_tunneled_dl_string = geoip_get_dirreq_history(DIRREQ_TUNNELED); + + /* Put everything together into a single string. */ + tor_asprintf(&result, "dirreq-stats-end %s (%d s)\n" + "dirreq-v3-ips %s\n" + "dirreq-v3-reqs %s\n" + "dirreq-v3-resp ok=%u,not-enough-sigs=%u,unavailable=%u," + "not-found=%u,not-modified=%u,busy=%u\n" + "dirreq-v3-direct-dl %s\n" + "dirreq-v3-tunneled-dl %s\n", + t, + (unsigned) (now - start_of_dirreq_stats_interval), + v3_ips_string ? v3_ips_string : "", + v3_reqs_string ? v3_reqs_string : "", + ns_v3_responses[GEOIP_SUCCESS], + ns_v3_responses[GEOIP_REJECT_NOT_ENOUGH_SIGS], + ns_v3_responses[GEOIP_REJECT_UNAVAILABLE], + ns_v3_responses[GEOIP_REJECT_NOT_FOUND], + ns_v3_responses[GEOIP_REJECT_NOT_MODIFIED], + ns_v3_responses[GEOIP_REJECT_BUSY], + v3_direct_dl_string ? v3_direct_dl_string : "", + v3_tunneled_dl_string ? v3_tunneled_dl_string : ""); + + /* Free partial strings. */ + tor_free(v3_ips_string); + tor_free(v3_reqs_string); + tor_free(v3_direct_dl_string); + tor_free(v3_tunneled_dl_string); + + return result; +} + +/** If 24 hours have passed since the beginning of the current dirreq + * stats period, write dirreq stats to $DATADIR/stats/dirreq-stats + * (possibly overwriting an existing file) and reset counters. Return + * when we would next want to write dirreq stats or 0 if we never want to + * write. */ +time_t +geoip_dirreq_stats_write(time_t now) +{ + char *str = NULL; + + if (!start_of_dirreq_stats_interval) + return 0; /* Not initialized. */ + if (start_of_dirreq_stats_interval + WRITE_STATS_INTERVAL > now) + goto done; /* Not ready to write. */ + + /* Discard all items in the client history that are too old. */ + geoip_remove_old_clients(start_of_dirreq_stats_interval); + + /* Generate history string .*/ + str = geoip_format_dirreq_stats(now); + if (! str) + goto done; + + /* Write dirreq-stats string to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "dirreq-stats", str, "dirreq statistics"); + /* Reset measurement interval start. */ + geoip_reset_dirreq_stats(now); + } + + done: + tor_free(str); + return start_of_dirreq_stats_interval + WRITE_STATS_INTERVAL; +} + +/** Start time of bridge stats or 0 if we're not collecting bridge + * statistics. */ +static time_t start_of_bridge_stats_interval; + +/** Initialize bridge stats. */ +void +geoip_bridge_stats_init(time_t now) +{ + start_of_bridge_stats_interval = now; +} + +/** Stop collecting bridge stats in a way that we can re-start doing so in + * geoip_bridge_stats_init(). */ +void +geoip_bridge_stats_term(void) +{ + client_history_clear(); + start_of_bridge_stats_interval = 0; +} + +/** Validate a bridge statistics string as it would be written to a + * current extra-info descriptor. Return 1 if the string is valid and + * recent enough, or 0 otherwise. */ +static int +validate_bridge_stats(const char *stats_str, time_t now) +{ + char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1], + *eos; + + const char *BRIDGE_STATS_END = "bridge-stats-end "; + const char *BRIDGE_IPS = "bridge-ips "; + const char *BRIDGE_IPS_EMPTY_LINE = "bridge-ips\n"; + const char *BRIDGE_TRANSPORTS = "bridge-ip-transports "; + const char *BRIDGE_TRANSPORTS_EMPTY_LINE = "bridge-ip-transports\n"; + const char *tmp; + time_t stats_end_time; + int seconds; + tor_assert(stats_str); + + /* Parse timestamp and number of seconds from + "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */ + tmp = find_str_at_start_of_line(stats_str, BRIDGE_STATS_END); + if (!tmp) + return 0; + tmp += strlen(BRIDGE_STATS_END); + + if (strlen(tmp) < ISO_TIME_LEN + 6) + return 0; + strlcpy(stats_end_str, tmp, sizeof(stats_end_str)); + if (parse_iso_time(stats_end_str, &stats_end_time) < 0) + return 0; + if (stats_end_time < now - (25*60*60) || + stats_end_time > now + (1*60*60)) + return 0; + seconds = (int)strtol(tmp + ISO_TIME_LEN + 2, &eos, 10); + if (!eos || seconds < 23*60*60) + return 0; + format_iso_time(stats_start_str, stats_end_time - seconds); + + /* Parse: "bridge-ips CC=N,CC=N,..." */ + tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS); + if (!tmp) { + /* Look if there is an empty "bridge-ips" line */ + tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS_EMPTY_LINE); + if (!tmp) + return 0; + } + + /* Parse: "bridge-ip-transports PT=N,PT=N,..." */ + tmp = find_str_at_start_of_line(stats_str, BRIDGE_TRANSPORTS); + if (!tmp) { + /* Look if there is an empty "bridge-ip-transports" line */ + tmp = find_str_at_start_of_line(stats_str, BRIDGE_TRANSPORTS_EMPTY_LINE); + if (!tmp) + return 0; + } + + return 1; +} + +/** Most recent bridge statistics formatted to be written to extra-info + * descriptors. */ +static char *bridge_stats_extrainfo = NULL; + +/** Return a newly allocated string holding our bridge usage stats by country + * in a format suitable for inclusion in an extrainfo document. Return NULL on + * failure. */ +char * +geoip_format_bridge_stats(time_t now) +{ + char *out = NULL; + char *country_data = NULL, *ipver_data = NULL, *transport_data = NULL; + long duration = now - start_of_bridge_stats_interval; + char written[ISO_TIME_LEN+1]; + + if (duration < 0) + return NULL; + if (!start_of_bridge_stats_interval) + return NULL; /* Not initialized. */ + + format_iso_time(written, now); + geoip_get_client_history(GEOIP_CLIENT_CONNECT, &country_data, &ipver_data); + transport_data = geoip_get_transport_history(); + + tor_asprintf(&out, + "bridge-stats-end %s (%ld s)\n" + "bridge-ips %s\n" + "bridge-ip-versions %s\n" + "bridge-ip-transports %s\n", + written, duration, + country_data ? country_data : "", + ipver_data ? ipver_data : "", + transport_data ? transport_data : ""); + tor_free(country_data); + tor_free(ipver_data); + tor_free(transport_data); + + return out; +} + +/** Return a newly allocated string holding our bridge usage stats by country + * in a format suitable for the answer to a controller request. Return NULL on + * failure. */ +static char * +format_bridge_stats_controller(time_t now) +{ + char *out = NULL, *country_data = NULL, *ipver_data = NULL; + char started[ISO_TIME_LEN+1]; + (void) now; + + format_iso_time(started, start_of_bridge_stats_interval); + geoip_get_client_history(GEOIP_CLIENT_CONNECT, &country_data, &ipver_data); + + tor_asprintf(&out, + "TimeStarted=\"%s\" CountrySummary=%s IPVersions=%s", + started, + country_data ? country_data : "", + ipver_data ? ipver_data : ""); + tor_free(country_data); + tor_free(ipver_data); + return out; +} + +/** Return a newly allocated string holding our bridge usage stats by + * country in a format suitable for inclusion in our heartbeat + * message. Return NULL on failure. */ +char * +format_client_stats_heartbeat(time_t now) +{ + const int n_hours = 6; + char *out = NULL; + int n_clients = 0; + clientmap_entry_t **ent; + unsigned cutoff = (unsigned)( (now-n_hours*3600)/60 ); + + if (!start_of_bridge_stats_interval) + return NULL; /* Not initialized. */ + + /* count unique IPs */ + HT_FOREACH(ent, clientmap, &client_history) { + /* only count directly connecting clients */ + if ((*ent)->action != GEOIP_CLIENT_CONNECT) + continue; + if ((*ent)->last_seen_in_minutes < cutoff) + continue; + n_clients++; + } + + tor_asprintf(&out, "Heartbeat: " + "In the last %d hours, I have seen %d unique clients.", + n_hours, + n_clients); + + return out; +} + +/** Write bridge statistics to $DATADIR/stats/bridge-stats and return + * when we should next try to write statistics. */ +time_t +geoip_bridge_stats_write(time_t now) +{ + char *val = NULL; + + /* Check if 24 hours have passed since starting measurements. */ + if (now < start_of_bridge_stats_interval + WRITE_STATS_INTERVAL) + return start_of_bridge_stats_interval + WRITE_STATS_INTERVAL; + + /* Discard all items in the client history that are too old. */ + geoip_remove_old_clients(start_of_bridge_stats_interval); + + /* Generate formatted string */ + val = geoip_format_bridge_stats(now); + if (val == NULL) + goto done; + + /* Update the stored value. */ + tor_free(bridge_stats_extrainfo); + bridge_stats_extrainfo = val; + start_of_bridge_stats_interval = now; + + /* Write it to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "bridge-stats", + bridge_stats_extrainfo, "bridge statistics"); + + /* Tell the controller, "hey, there are clients!" */ + { + char *controller_str = format_bridge_stats_controller(now); + if (controller_str) + control_event_clients_seen(controller_str); + tor_free(controller_str); + } + } + + done: + return start_of_bridge_stats_interval + WRITE_STATS_INTERVAL; +} + +/** Try to load the most recent bridge statistics from disk, unless we + * have finished a measurement interval lately, and check whether they + * are still recent enough. */ +static void +load_bridge_stats(time_t now) +{ + char *fname, *contents; + if (bridge_stats_extrainfo) + return; + + fname = get_datadir_fname2("stats", "bridge-stats"); + contents = read_file_to_str(fname, RFTS_IGNORE_MISSING, NULL); + if (contents && validate_bridge_stats(contents, now)) { + bridge_stats_extrainfo = contents; + } else { + tor_free(contents); + } + + tor_free(fname); +} + +/** Return most recent bridge statistics for inclusion in extra-info + * descriptors, or NULL if we don't have recent bridge statistics. */ +const char * +geoip_get_bridge_stats_extrainfo(time_t now) +{ + load_bridge_stats(now); + return bridge_stats_extrainfo; +} + +/** Return a new string containing the recent bridge statistics to be returned + * to controller clients, or NULL if we don't have any bridge statistics. */ +char * +geoip_get_bridge_stats_controller(time_t now) +{ + return format_bridge_stats_controller(now); +} + +/** Start time of entry stats or 0 if we're not collecting entry + * statistics. */ +static time_t start_of_entry_stats_interval; + +/** Initialize entry stats. */ +void +geoip_entry_stats_init(time_t now) +{ + start_of_entry_stats_interval = now; +} + +/** Reset counters for entry stats. */ +void +geoip_reset_entry_stats(time_t now) +{ + client_history_clear(); + start_of_entry_stats_interval = now; +} + +/** Stop collecting entry stats in a way that we can re-start doing so in + * geoip_entry_stats_init(). */ +void +geoip_entry_stats_term(void) +{ + geoip_reset_entry_stats(0); +} + +/** Return a newly allocated string containing the entry statistics + * until <b>now</b>, or NULL if we're not collecting entry stats. Caller + * must ensure start_of_entry_stats_interval lies in the past. */ +char * +geoip_format_entry_stats(time_t now) +{ + char t[ISO_TIME_LEN+1]; + char *data = NULL; + char *result; + + if (!start_of_entry_stats_interval) + return NULL; /* Not initialized. */ + + tor_assert(now >= start_of_entry_stats_interval); + + geoip_get_client_history(GEOIP_CLIENT_CONNECT, &data, NULL); + format_iso_time(t, now); + tor_asprintf(&result, + "entry-stats-end %s (%u s)\n" + "entry-ips %s\n", + t, (unsigned) (now - start_of_entry_stats_interval), + data ? data : ""); + tor_free(data); + return result; +} + +/** If 24 hours have passed since the beginning of the current entry stats + * period, write entry stats to $DATADIR/stats/entry-stats (possibly + * overwriting an existing file) and reset counters. Return when we would + * next want to write entry stats or 0 if we never want to write. */ +time_t +geoip_entry_stats_write(time_t now) +{ + char *str = NULL; + + if (!start_of_entry_stats_interval) + return 0; /* Not initialized. */ + if (start_of_entry_stats_interval + WRITE_STATS_INTERVAL > now) + goto done; /* Not ready to write. */ + + /* Discard all items in the client history that are too old. */ + geoip_remove_old_clients(start_of_entry_stats_interval); + + /* Generate history string .*/ + str = geoip_format_entry_stats(now); + + /* Write entry-stats string to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "entry-stats", str, "entry statistics"); + + /* Reset measurement interval start. */ + geoip_reset_entry_stats(now); + } + + done: + tor_free(str); + return start_of_entry_stats_interval + WRITE_STATS_INTERVAL; +} + +/** Helper used to implement GETINFO ip-to-country/... controller command. */ +int +getinfo_helper_geoip(control_connection_t *control_conn, + const char *question, char **answer, + const char **errmsg) +{ + (void)control_conn; + if (!strcmpstart(question, "ip-to-country/")) { + int c; + sa_family_t family; + tor_addr_t addr; + question += strlen("ip-to-country/"); + + if (!strcmp(question, "ipv4-available") || + !strcmp(question, "ipv6-available")) { + family = !strcmp(question, "ipv4-available") ? AF_INET : AF_INET6; + const int available = geoip_is_loaded(family); + tor_asprintf(answer, "%d", !! available); + return 0; + } + + family = tor_addr_parse(&addr, question); + if (family != AF_INET && family != AF_INET6) { + *errmsg = "Invalid address family"; + return -1; + } + if (!geoip_is_loaded(family)) { + *errmsg = "GeoIP data not loaded"; + return -1; + } + if (family == AF_INET) + c = geoip_get_country_by_ipv4(tor_addr_to_ipv4h(&addr)); + else /* AF_INET6 */ + c = geoip_get_country_by_ipv6(tor_addr_to_in6(&addr)); + *answer = tor_strdup(geoip_get_country_name(c)); + } + return 0; +} + +/** Release all storage held by the GeoIP databases and country list. */ +STATIC void +clear_geoip_db(void) +{ + if (geoip_countries) { + SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c)); + smartlist_free(geoip_countries); + } + + strmap_free(country_idxplus1_by_lc_code, NULL); + if (geoip_ipv4_entries) { + SMARTLIST_FOREACH(geoip_ipv4_entries, geoip_ipv4_entry_t *, ent, + tor_free(ent)); + smartlist_free(geoip_ipv4_entries); + } + if (geoip_ipv6_entries) { + SMARTLIST_FOREACH(geoip_ipv6_entries, geoip_ipv6_entry_t *, ent, + tor_free(ent)); + smartlist_free(geoip_ipv6_entries); + } + geoip_countries = NULL; + country_idxplus1_by_lc_code = NULL; + geoip_ipv4_entries = NULL; + geoip_ipv6_entries = NULL; +} + +/** Release all storage held in this file. */ +void +geoip_free_all(void) +{ + { + clientmap_entry_t **ent, **next, *this; + for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) { + this = *ent; + next = HT_NEXT_RMV(clientmap, &client_history, ent); + clientmap_entry_free(this); + } + HT_CLEAR(clientmap, &client_history); + } + { + dirreq_map_entry_t **ent, **next, *this; + for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) { + this = *ent; + next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent); + tor_free(this); + } + HT_CLEAR(dirreqmap, &dirreq_map); + } + + clear_geoip_db(); + tor_free(bridge_stats_extrainfo); + + memset(geoip_digest, 0, sizeof(geoip_digest)); + memset(geoip6_digest, 0, sizeof(geoip6_digest)); +} diff --git a/src/feature/stats/geoip.h b/src/feature/stats/geoip.h new file mode 100644 index 0000000000..fd19b7560a --- /dev/null +++ b/src/feature/stats/geoip.h @@ -0,0 +1,157 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file geoip.h + * \brief Header file for geoip.c. + **/ + +#ifndef TOR_GEOIP_H +#define TOR_GEOIP_H + +#include "lib/testsupport/testsupport.h" +#include "or/dos.h" + +/** Indicates an action that we might be noting geoip statistics on. + * Note that if we're noticing CONNECT, we're a bridge, and if we're noticing + * the others, we're not. + */ +typedef enum { + /** We've noticed a connection as a bridge relay or entry guard. */ + GEOIP_CLIENT_CONNECT = 0, + /** We've served a networkstatus consensus as a directory server. */ + GEOIP_CLIENT_NETWORKSTATUS = 1, +} geoip_client_action_t; +/** Indicates either a positive reply or a reason for rejectng a network + * status request that will be included in geoip statistics. */ +typedef enum { + /** Request is answered successfully. */ + GEOIP_SUCCESS = 0, + /** V3 network status is not signed by a sufficient number of requested + * authorities. */ + GEOIP_REJECT_NOT_ENOUGH_SIGS = 1, + /** Requested network status object is unavailable. */ + GEOIP_REJECT_UNAVAILABLE = 2, + /** Requested network status not found. */ + GEOIP_REJECT_NOT_FOUND = 3, + /** Network status has not been modified since If-Modified-Since time. */ + GEOIP_REJECT_NOT_MODIFIED = 4, + /** Directory is busy. */ + GEOIP_REJECT_BUSY = 5, +} geoip_ns_response_t; +#define GEOIP_NS_RESPONSE_NUM 6 + +/** Directory requests that we are measuring can be either direct or + * tunneled. */ +typedef enum { + DIRREQ_DIRECT = 0, + DIRREQ_TUNNELED = 1, +} dirreq_type_t; + +/** Possible states for either direct or tunneled directory requests that + * are relevant for determining network status download times. */ +typedef enum { + /** Found that the client requests a network status; applies to both + * direct and tunneled requests; initial state of a request that we are + * measuring. */ + DIRREQ_IS_FOR_NETWORK_STATUS = 0, + /** Finished writing a network status to the directory connection; + * applies to both direct and tunneled requests; completes a direct + * request. */ + DIRREQ_FLUSHING_DIR_CONN_FINISHED = 1, + /** END cell sent to circuit that initiated a tunneled request. */ + DIRREQ_END_CELL_SENT = 2, + /** Flushed last cell from queue of the circuit that initiated a + * tunneled request to the outbuf of the OR connection. */ + DIRREQ_CIRC_QUEUE_FLUSHED = 3, + /** Flushed last byte from buffer of the channel belonging to the + * circuit that initiated a tunneled request; completes a tunneled + * request. */ + DIRREQ_CHANNEL_BUFFER_FLUSHED = 4 +} dirreq_state_t; + +#ifdef GEOIP_PRIVATE +STATIC int geoip_parse_entry(const char *line, sa_family_t family); +STATIC int geoip_get_country_by_ipv4(uint32_t ipaddr); +STATIC int geoip_get_country_by_ipv6(const struct in6_addr *addr); +STATIC void clear_geoip_db(void); +#endif /* defined(GEOIP_PRIVATE) */ + +/** Entry in a map from IP address to the last time we've seen an incoming + * connection from that IP address. Used by bridges only to track which + * countries have them blocked, or the DoS mitigation subsystem if enabled. */ +typedef struct clientmap_entry_t { + HT_ENTRY(clientmap_entry_t) node; + tor_addr_t addr; + /* Name of pluggable transport used by this client. NULL if no + pluggable transport was used. */ + char *transport_name; + + /** Time when we last saw this IP address, in MINUTES since the epoch. + * + * (This will run out of space around 4011 CE. If Tor is still in use around + * 4000 CE, please remember to add more bits to last_seen_in_minutes.) */ + unsigned int last_seen_in_minutes:30; + unsigned int action:2; + + /* This object is used to keep some statistics per client address for the + * DoS mitigation subsystem. */ + dos_client_stats_t dos_stats; +} clientmap_entry_t; + +int should_record_bridge_info(const or_options_t *options); +int geoip_load_file(sa_family_t family, const char *filename); +MOCK_DECL(int, geoip_get_country_by_addr, (const tor_addr_t *addr)); +MOCK_DECL(int, geoip_get_n_countries, (void)); +const char *geoip_get_country_name(country_t num); +MOCK_DECL(int, geoip_is_loaded, (sa_family_t family)); +const char *geoip_db_digest(sa_family_t family); +MOCK_DECL(country_t, geoip_get_country, (const char *countrycode)); + +void geoip_note_client_seen(geoip_client_action_t action, + const tor_addr_t *addr, const char *transport_name, + time_t now); +void geoip_remove_old_clients(time_t cutoff); +clientmap_entry_t *geoip_lookup_client(const tor_addr_t *addr, + const char *transport_name, + geoip_client_action_t action); +size_t geoip_client_cache_total_allocation(void); +size_t geoip_client_cache_handle_oom(time_t now, size_t min_remove_bytes); + +void geoip_note_ns_response(geoip_ns_response_t response); +char *geoip_get_transport_history(void); +int geoip_get_client_history(geoip_client_action_t action, + char **country_str, char **ipver_str); +char *geoip_get_request_history(void); +int getinfo_helper_geoip(control_connection_t *control_conn, + const char *question, char **answer, + const char **errmsg); +void geoip_free_all(void); + +void geoip_start_dirreq(uint64_t dirreq_id, size_t response_size, + dirreq_type_t type); +void geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type, + dirreq_state_t new_state); + +void geoip_dirreq_stats_init(time_t now); +void geoip_reset_dirreq_stats(time_t now); +char *geoip_format_dirreq_stats(time_t now); +time_t geoip_dirreq_stats_write(time_t now); +void geoip_dirreq_stats_term(void); +void geoip_entry_stats_init(time_t now); +time_t geoip_entry_stats_write(time_t now); +void geoip_entry_stats_term(void); +void geoip_reset_entry_stats(time_t now); +char *geoip_format_entry_stats(time_t now); +void geoip_bridge_stats_init(time_t now); +char *geoip_format_bridge_stats(time_t now); +time_t geoip_bridge_stats_write(time_t now); +void geoip_bridge_stats_term(void); +const char *geoip_get_bridge_stats_extrainfo(time_t); +char *geoip_get_bridge_stats_controller(time_t); +char *format_client_stats_heartbeat(time_t now); + +#endif /* !defined(TOR_GEOIP_H) */ diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c new file mode 100644 index 0000000000..6607c25964 --- /dev/null +++ b/src/feature/stats/rephist.c @@ -0,0 +1,3220 @@ +/* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file rephist.c + * \brief Basic history and performance-tracking functionality. + * + * Basic history and performance-tracking functionality to remember + * which servers have worked in the past, how much bandwidth we've + * been using, which ports we tend to want, and so on; further, + * exit port statistics, cell statistics, and connection statistics. + * + * The history and information tracked in this module could sensibly be + * divided into several categories: + * + * <ul><li>Statistics used by authorities to remember the uptime and + * stability information about various relays, including "uptime", + * "weighted fractional uptime" and "mean time between failures". + * + * <li>Bandwidth usage history, used by relays to self-report how much + * bandwidth they've used for different purposes over last day or so, + * in order to generate the {dirreq-,}{read,write}-history lines in + * that they publish. + * + * <li>Predicted ports, used by clients to remember how long it's been + * since they opened an exit connection to each given target + * port. Clients use this information in order to try to keep circuits + * open to exit nodes that can connect to the ports that they care + * about. (The predicted ports mechanism also handles predicted circuit + * usage that _isn't_ port-specific, such as resolves, internal circuits, + * and so on.) + * + * <li>Public key operation counters, for tracking how many times we've + * done each public key operation. (This is unmaintained and we should + * remove it.) + * + * <li>Exit statistics by port, used by exits to keep track of the + * number of streams and bytes they've served at each exit port, so they + * can generate their exit-kibibytes-{read,written} and + * exit-streams-opened statistics. + * + * <li>Circuit stats, used by relays instances to tract circuit + * queue fullness and delay over time, and generate cell-processed-cells, + * cell-queued-cells, cell-time-in-queue, and cell-circuits-per-decile + * statistics. + * + * <li>Descriptor serving statistics, used by directory caches to track + * how many descriptors they've served. + * + * <li>Connection statistics, used by relays to track one-way and + * bidirectional connections. + * + * <li>Onion handshake statistics, used by relays to count how many + * TAP and ntor handshakes they've handled. + * + * <li>Hidden service statistics, used by relays to count rendezvous + * traffic and HSDir-stored descriptors. + * + * <li>Link protocol statistics, used by relays to count how many times + * each link protocol has been used. + * + * </ul> + * + * The entry points for this module are scattered throughout the + * codebase. Sending data, receiving data, connecting to a relay, + * losing a connection to a relay, and so on can all trigger a change in + * our current stats. Relays also invoke this module in order to + * extract their statistics when building routerinfo and extrainfo + * objects in router.c. + * + * TODO: This module should be broken up. + * + * (The "rephist" name originally stood for "reputation and history". ) + **/ + +#include "or/or.h" +#include "or/circuitlist.h" +#include "or/circuituse.h" +#include "or/config.h" +#include "lib/crypt_ops/crypto_rand.h" +#include "or/networkstatus.h" +#include "or/nodelist.h" +#include "or/rephist.h" +#include "or/router.h" +#include "or/routerlist.h" +#include "ht.h" +#include "or/channelpadding.h" +#include "or/connection_or.h" +#include "or/statefile.h" + +#include "or/networkstatus_st.h" +#include "or/or_circuit_st.h" +#include "or/or_state_st.h" + +#include "lib/container/bloomfilt.h" +#include "lib/container/order.h" +#include "lib/math/fp.h" +#include "lib/math/laplace.h" + +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif + +static void bw_arrays_init(void); +static void predicted_ports_alloc(void); + +/** Total number of bytes currently allocated in fields used by rephist.c. */ +uint64_t rephist_total_alloc=0; +/** Number of or_history_t objects currently allocated. */ +uint32_t rephist_total_num=0; + +/** If the total weighted run count of all runs for a router ever falls + * below this amount, the router can be treated as having 0 MTBF. */ +#define STABILITY_EPSILON 0.0001 +/** Value by which to discount all old intervals for MTBF purposes. This + * is compounded every STABILITY_INTERVAL. */ +#define STABILITY_ALPHA 0.95 +/** Interval at which to discount all old intervals for MTBF purposes. */ +#define STABILITY_INTERVAL (12*60*60) +/* (This combination of ALPHA, INTERVAL, and EPSILON makes it so that an + * interval that just ended counts twice as much as one that ended a week ago, + * 20X as much as one that ended a month ago, and routers that have had no + * uptime data for about half a year will get forgotten.) */ + +/** History of an OR. */ +typedef struct or_history_t { + /** When did we start tracking this OR? */ + time_t since; + /** When did we most recently note a change to this OR? */ + time_t changed; + + /** The address at which we most recently connected to this OR + * successfully. */ + tor_addr_t last_reached_addr; + + /** The port at which we most recently connected to this OR successfully */ + uint16_t last_reached_port; + + /* === For MTBF tracking: */ + /** Weighted sum total of all times that this router has been online. + */ + unsigned long weighted_run_length; + /** If the router is now online (according to stability-checking rules), + * when did it come online? */ + time_t start_of_run; + /** Sum of weights for runs in weighted_run_length. */ + double total_run_weights; + /* === For fractional uptime tracking: */ + time_t start_of_downtime; + unsigned long weighted_uptime; + unsigned long total_weighted_time; +} or_history_t; + +/** + * This structure holds accounting needed to calculate the padding overhead. + */ +typedef struct padding_counts_t { + /** Total number of cells we have received, including padding */ + uint64_t read_cell_count; + /** Total number of cells we have sent, including padding */ + uint64_t write_cell_count; + /** Total number of CELL_PADDING cells we have received */ + uint64_t read_pad_cell_count; + /** Total number of CELL_PADDING cells we have sent */ + uint64_t write_pad_cell_count; + /** Total number of read cells on padding-enabled conns */ + uint64_t enabled_read_cell_count; + /** Total number of sent cells on padding-enabled conns */ + uint64_t enabled_write_cell_count; + /** Total number of read CELL_PADDING cells on padding-enabled cons */ + uint64_t enabled_read_pad_cell_count; + /** Total number of sent CELL_PADDING cells on padding-enabled cons */ + uint64_t enabled_write_pad_cell_count; + /** Total number of RELAY_DROP cells we have received */ + uint64_t read_drop_cell_count; + /** Total number of RELAY_DROP cells we have sent */ + uint64_t write_drop_cell_count; + /** The maximum number of padding timers we've seen in 24 hours */ + uint64_t maximum_chanpad_timers; + /** When did we first copy padding_current into padding_published? */ + char first_published_at[ISO_TIME_LEN+1]; +} padding_counts_t; + +/** Holds the current values of our padding statistics. + * It is not published until it is transferred to padding_published. */ +static padding_counts_t padding_current; + +/** Remains fixed for a 24 hour period, and then is replaced + * by a redacted copy of padding_current */ +static padding_counts_t padding_published; + +/** When did we last multiply all routers' weighted_run_length and + * total_run_weights by STABILITY_ALPHA? */ +static time_t stability_last_downrated = 0; + +/** */ +static time_t started_tracking_stability = 0; + +/** Map from hex OR identity digest to or_history_t. */ +static digestmap_t *history_map = NULL; + +/** Return the or_history_t for the OR with identity digest <b>id</b>, + * creating it if necessary. */ +static or_history_t * +get_or_history(const char* id) +{ + or_history_t *hist; + + if (tor_digest_is_zero(id)) + return NULL; + + hist = digestmap_get(history_map, id); + if (!hist) { + hist = tor_malloc_zero(sizeof(or_history_t)); + rephist_total_alloc += sizeof(or_history_t); + rephist_total_num++; + hist->since = hist->changed = time(NULL); + tor_addr_make_unspec(&hist->last_reached_addr); + digestmap_set(history_map, id, hist); + } + return hist; +} + +/** Helper: free storage held by a single OR history entry. */ +static void +free_or_history(void *_hist) +{ + or_history_t *hist = _hist; + rephist_total_alloc -= sizeof(or_history_t); + rephist_total_num--; + tor_free(hist); +} + +/** Initialize the static data structures for tracking history. */ +void +rep_hist_init(void) +{ + history_map = digestmap_new(); + bw_arrays_init(); + predicted_ports_alloc(); +} + +/** We have just decided that this router with identity digest <b>id</b> is + * reachable, meaning we will give it a "Running" flag for the next while. */ +void +rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, + const uint16_t at_port, time_t when) +{ + or_history_t *hist = get_or_history(id); + int was_in_run = 1; + char tbuf[ISO_TIME_LEN+1]; + int addr_changed, port_changed; + + tor_assert(hist); + tor_assert((!at_addr && !at_port) || (at_addr && at_port)); + + addr_changed = at_addr && !tor_addr_is_null(&hist->last_reached_addr) && + tor_addr_compare(at_addr, &hist->last_reached_addr, CMP_EXACT) != 0; + port_changed = at_port && hist->last_reached_port && + at_port != hist->last_reached_port; + + if (!started_tracking_stability) + started_tracking_stability = time(NULL); + if (!hist->start_of_run) { + hist->start_of_run = when; + was_in_run = 0; + } + if (hist->start_of_downtime) { + long down_length; + + format_local_iso_time(tbuf, hist->start_of_downtime); + log_info(LD_HIST, "Router %s is now Running; it had been down since %s.", + hex_str(id, DIGEST_LEN), tbuf); + if (was_in_run) + log_info(LD_HIST, " (Paradoxically, it was already Running too.)"); + + down_length = when - hist->start_of_downtime; + hist->total_weighted_time += down_length; + hist->start_of_downtime = 0; + } else if (addr_changed || port_changed) { + /* If we're reachable, but the address changed, treat this as some + * downtime. */ + int penalty = get_options()->TestingTorNetwork ? 240 : 3600; + networkstatus_t *ns; + + if ((ns = networkstatus_get_latest_consensus())) { + int fresh_interval = (int)(ns->fresh_until - ns->valid_after); + int live_interval = (int)(ns->valid_until - ns->valid_after); + /* on average, a descriptor addr change takes .5 intervals to make it + * into a consensus, and half a liveness period to make it to + * clients. */ + penalty = (int)(fresh_interval + live_interval) / 2; + } + format_local_iso_time(tbuf, hist->start_of_run); + log_info(LD_HIST,"Router %s still seems Running, but its address appears " + "to have changed since the last time it was reachable. I'm " + "going to treat it as having been down for %d seconds", + hex_str(id, DIGEST_LEN), penalty); + rep_hist_note_router_unreachable(id, when-penalty); + rep_hist_note_router_reachable(id, NULL, 0, when); + } else { + format_local_iso_time(tbuf, hist->start_of_run); + if (was_in_run) + log_debug(LD_HIST, "Router %s is still Running; it has been Running " + "since %s", hex_str(id, DIGEST_LEN), tbuf); + else + log_info(LD_HIST,"Router %s is now Running; it was previously untracked", + hex_str(id, DIGEST_LEN)); + } + if (at_addr) + tor_addr_copy(&hist->last_reached_addr, at_addr); + if (at_port) + hist->last_reached_port = at_port; +} + +/** We have just decided that this router is unreachable, meaning + * we are taking away its "Running" flag. */ +void +rep_hist_note_router_unreachable(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + char tbuf[ISO_TIME_LEN+1]; + int was_running = 0; + if (!started_tracking_stability) + started_tracking_stability = time(NULL); + + tor_assert(hist); + if (hist->start_of_run) { + /*XXXX We could treat failed connections differently from failed + * connect attempts. */ + long run_length = when - hist->start_of_run; + format_local_iso_time(tbuf, hist->start_of_run); + + hist->total_run_weights += 1.0; + hist->start_of_run = 0; + if (run_length < 0) { + unsigned long penalty = -run_length; +#define SUBTRACT_CLAMPED(var, penalty) \ + do { (var) = (var) < (penalty) ? 0 : (var) - (penalty); } while (0) + + SUBTRACT_CLAMPED(hist->weighted_run_length, penalty); + SUBTRACT_CLAMPED(hist->weighted_uptime, penalty); + } else { + hist->weighted_run_length += run_length; + hist->weighted_uptime += run_length; + hist->total_weighted_time += run_length; + } + was_running = 1; + log_info(LD_HIST, "Router %s is now non-Running: it had previously been " + "Running since %s. Its total weighted uptime is %lu/%lu.", + hex_str(id, DIGEST_LEN), tbuf, hist->weighted_uptime, + hist->total_weighted_time); + } + if (!hist->start_of_downtime) { + hist->start_of_downtime = when; + + if (!was_running) + log_info(LD_HIST, "Router %s is now non-Running; it was previously " + "untracked.", hex_str(id, DIGEST_LEN)); + } else { + if (!was_running) { + format_local_iso_time(tbuf, hist->start_of_downtime); + + log_info(LD_HIST, "Router %s is still non-Running; it has been " + "non-Running since %s.", hex_str(id, DIGEST_LEN), tbuf); + } + } +} + +/** Mark a router with ID <b>id</b> as non-Running, and retroactively declare + * that it has never been running: give it no stability and no WFU. */ +void +rep_hist_make_router_pessimal(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + tor_assert(hist); + + rep_hist_note_router_unreachable(id, when); + + hist->weighted_run_length = 0; + hist->weighted_uptime = 0; +} + +/** Helper: Discount all old MTBF data, if it is time to do so. Return + * the time at which we should next discount MTBF data. */ +time_t +rep_hist_downrate_old_runs(time_t now) +{ + digestmap_iter_t *orhist_it; + const char *digest1; + or_history_t *hist; + void *hist_p; + double alpha = 1.0; + + if (!history_map) + history_map = digestmap_new(); + if (!stability_last_downrated) + stability_last_downrated = now; + if (stability_last_downrated + STABILITY_INTERVAL > now) + return stability_last_downrated + STABILITY_INTERVAL; + + /* Okay, we should downrate the data. By how much? */ + while (stability_last_downrated + STABILITY_INTERVAL < now) { + stability_last_downrated += STABILITY_INTERVAL; + alpha *= STABILITY_ALPHA; + } + + log_info(LD_HIST, "Discounting all old stability info by a factor of %f", + alpha); + + /* Multiply every w_r_l, t_r_w pair by alpha. */ + for (orhist_it = digestmap_iter_init(history_map); + !digestmap_iter_done(orhist_it); + orhist_it = digestmap_iter_next(history_map,orhist_it)) { + digestmap_iter_get(orhist_it, &digest1, &hist_p); + hist = hist_p; + + hist->weighted_run_length = + (unsigned long)(hist->weighted_run_length * alpha); + hist->total_run_weights *= alpha; + + hist->weighted_uptime = (unsigned long)(hist->weighted_uptime * alpha); + hist->total_weighted_time = (unsigned long) + (hist->total_weighted_time * alpha); + } + + return stability_last_downrated + STABILITY_INTERVAL; +} + +/** Helper: Return the weighted MTBF of the router with history <b>hist</b>. */ +static double +get_stability(or_history_t *hist, time_t when) +{ + long total = hist->weighted_run_length; + double total_weights = hist->total_run_weights; + + if (hist->start_of_run) { + /* We're currently in a run. Let total and total_weights hold the values + * they would hold if the current run were to end now. */ + total += (when-hist->start_of_run); + total_weights += 1.0; + } + if (total_weights < STABILITY_EPSILON) { + /* Round down to zero, and avoid divide-by-zero. */ + return 0.0; + } + + return total / total_weights; +} + +/** Return the total amount of time we've been observing, with each run of + * time downrated by the appropriate factor. */ +static long +get_total_weighted_time(or_history_t *hist, time_t when) +{ + long total = hist->total_weighted_time; + if (hist->start_of_run) { + total += (when - hist->start_of_run); + } else if (hist->start_of_downtime) { + total += (when - hist->start_of_downtime); + } + return total; +} + +/** Helper: Return the weighted percent-of-time-online of the router with + * history <b>hist</b>. */ +static double +get_weighted_fractional_uptime(or_history_t *hist, time_t when) +{ + long total = hist->total_weighted_time; + long up = hist->weighted_uptime; + + if (hist->start_of_run) { + long run_length = (when - hist->start_of_run); + up += run_length; + total += run_length; + } else if (hist->start_of_downtime) { + total += (when - hist->start_of_downtime); + } + + if (!total) { + /* Avoid calling anybody's uptime infinity (which should be impossible if + * the code is working), or NaN (which can happen for any router we haven't + * observed up or down yet). */ + return 0.0; + } + + return ((double) up) / total; +} + +/** Return how long the router whose identity digest is <b>id</b> has + * been reachable. Return 0 if the router is unknown or currently deemed + * unreachable. */ +long +rep_hist_get_uptime(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + if (!hist) + return 0; + if (!hist->start_of_run || when < hist->start_of_run) + return 0; + return when - hist->start_of_run; +} + +/** Return an estimated MTBF for the router whose identity digest is + * <b>id</b>. Return 0 if the router is unknown. */ +double +rep_hist_get_stability(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + if (!hist) + return 0.0; + + return get_stability(hist, when); +} + +/** Return an estimated percent-of-time-online for the router whose identity + * digest is <b>id</b>. Return 0 if the router is unknown. */ +double +rep_hist_get_weighted_fractional_uptime(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + if (!hist) + return 0.0; + + return get_weighted_fractional_uptime(hist, when); +} + +/** Return a number representing how long we've known about the router whose + * digest is <b>id</b>. Return 0 if the router is unknown. + * + * Be careful: this measure increases monotonically as we know the router for + * longer and longer, but it doesn't increase linearly. + */ +long +rep_hist_get_weighted_time_known(const char *id, time_t when) +{ + or_history_t *hist = get_or_history(id); + if (!hist) + return 0; + + return get_total_weighted_time(hist, when); +} + +/** Return true if we've been measuring MTBFs for long enough to + * pronounce on Stability. */ +int +rep_hist_have_measured_enough_stability(void) +{ + /* XXXX++ This doesn't do so well when we change our opinion + * as to whether we're tracking router stability. */ + return started_tracking_stability < time(NULL) - 4*60*60; +} + +/** Log all the reliability data we have remembered, with the chosen + * severity. + */ +void +rep_hist_dump_stats(time_t now, int severity) +{ + digestmap_iter_t *orhist_it; + const char *name1, *digest1; + char hexdigest1[HEX_DIGEST_LEN+1]; + or_history_t *or_history; + void *or_history_p; + const node_t *node; + + rep_history_clean(now - get_options()->RephistTrackTime); + + tor_log(severity, LD_HIST, "--------------- Dumping history information:"); + + for (orhist_it = digestmap_iter_init(history_map); + !digestmap_iter_done(orhist_it); + orhist_it = digestmap_iter_next(history_map,orhist_it)) { + double s; + long stability; + digestmap_iter_get(orhist_it, &digest1, &or_history_p); + or_history = (or_history_t*) or_history_p; + + if ((node = node_get_by_id(digest1)) && node_get_nickname(node)) + name1 = node_get_nickname(node); + else + name1 = "(unknown)"; + base16_encode(hexdigest1, sizeof(hexdigest1), digest1, DIGEST_LEN); + s = get_stability(or_history, now); + stability = (long)s; + tor_log(severity, LD_HIST, + "OR %s [%s]: wmtbf %lu:%02lu:%02lu", + name1, hexdigest1, + stability/3600, (stability/60)%60, stability%60); + } +} + +/** Remove history info for routers/links that haven't changed since + * <b>before</b>. + */ +void +rep_history_clean(time_t before) +{ + int authority = authdir_mode(get_options()); + or_history_t *or_history; + void *or_history_p; + digestmap_iter_t *orhist_it; + const char *d1; + + orhist_it = digestmap_iter_init(history_map); + while (!digestmap_iter_done(orhist_it)) { + int should_remove; + digestmap_iter_get(orhist_it, &d1, &or_history_p); + or_history = or_history_p; + + should_remove = authority ? + (or_history->total_run_weights < STABILITY_EPSILON && + !or_history->start_of_run) + : (or_history->changed < before); + if (should_remove) { + orhist_it = digestmap_iter_next_rmv(history_map, orhist_it); + free_or_history(or_history); + continue; + } + orhist_it = digestmap_iter_next(history_map, orhist_it); + } +} + +/** Write MTBF data to disk. Return 0 on success, negative on failure. + * + * If <b>missing_means_down</b>, then if we're about to write an entry + * that is still considered up but isn't in our routerlist, consider it + * to be down. */ +int +rep_hist_record_mtbf_data(time_t now, int missing_means_down) +{ + char time_buf[ISO_TIME_LEN+1]; + + digestmap_iter_t *orhist_it; + const char *digest; + void *or_history_p; + or_history_t *hist; + open_file_t *open_file = NULL; + FILE *f; + + { + char *filename = get_datadir_fname("router-stability"); + f = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE|O_TEXT, 0600, + &open_file); + tor_free(filename); + if (!f) + return -1; + } + + /* File format is: + * FormatLine *KeywordLine Data + * + * FormatLine = "format 1" NL + * KeywordLine = Keyword SP Arguments NL + * Data = "data" NL *RouterMTBFLine "." NL + * RouterMTBFLine = Fingerprint SP WeightedRunLen SP + * TotalRunWeights [SP S=StartRunTime] NL + */ +#define PUT(s) STMT_BEGIN if (fputs((s),f)<0) goto err; STMT_END +#define PRINTF(args) STMT_BEGIN if (fprintf args <0) goto err; STMT_END + + PUT("format 2\n"); + + format_iso_time(time_buf, time(NULL)); + PRINTF((f, "stored-at %s\n", time_buf)); + + if (started_tracking_stability) { + format_iso_time(time_buf, started_tracking_stability); + PRINTF((f, "tracked-since %s\n", time_buf)); + } + if (stability_last_downrated) { + format_iso_time(time_buf, stability_last_downrated); + PRINTF((f, "last-downrated %s\n", time_buf)); + } + + PUT("data\n"); + + /* XXX Nick: now bridge auths record this for all routers too. + * Should we make them record it only for bridge routers? -RD + * Not for 0.2.0. -NM */ + for (orhist_it = digestmap_iter_init(history_map); + !digestmap_iter_done(orhist_it); + orhist_it = digestmap_iter_next(history_map,orhist_it)) { + char dbuf[HEX_DIGEST_LEN+1]; + const char *t = NULL; + digestmap_iter_get(orhist_it, &digest, &or_history_p); + hist = (or_history_t*) or_history_p; + + base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN); + + if (missing_means_down && hist->start_of_run && + !connection_or_digest_is_known_relay(digest)) { + /* We think this relay is running, but it's not listed in our + * consensus. Somehow it fell out without telling us it went + * down. Complain and also correct it. */ + log_info(LD_HIST, + "Relay '%s' is listed as up in rephist, but it's not in " + "our routerlist. Correcting.", dbuf); + rep_hist_note_router_unreachable(digest, now); + } + + PRINTF((f, "R %s\n", dbuf)); + if (hist->start_of_run > 0) { + format_iso_time(time_buf, hist->start_of_run); + t = time_buf; + } + PRINTF((f, "+MTBF %lu %.5f%s%s\n", + hist->weighted_run_length, hist->total_run_weights, + t ? " S=" : "", t ? t : "")); + t = NULL; + if (hist->start_of_downtime > 0) { + format_iso_time(time_buf, hist->start_of_downtime); + t = time_buf; + } + PRINTF((f, "+WFU %lu %lu%s%s\n", + hist->weighted_uptime, hist->total_weighted_time, + t ? " S=" : "", t ? t : "")); + } + + PUT(".\n"); + +#undef PUT +#undef PRINTF + + return finish_writing_to_file(open_file); + err: + abort_writing_to_file(open_file); + return -1; +} + +/** Helper: return the first j >= i such that !strcmpstart(sl[j], prefix) and + * such that no line sl[k] with i <= k < j starts with "R ". Return -1 if no + * such line exists. */ +static int +find_next_with(smartlist_t *sl, int i, const char *prefix) +{ + for ( ; i < smartlist_len(sl); ++i) { + const char *line = smartlist_get(sl, i); + if (!strcmpstart(line, prefix)) + return i; + if (!strcmpstart(line, "R ")) + return -1; + } + return -1; +} + +/** How many bad times has parse_possibly_bad_iso_time() parsed? */ +static int n_bogus_times = 0; +/** Parse the ISO-formatted time in <b>s</b> into *<b>time_out</b>, but + * round any pre-1970 date to Jan 1, 1970. */ +static int +parse_possibly_bad_iso_time(const char *s, time_t *time_out) +{ + int year; + char b[5]; + strlcpy(b, s, sizeof(b)); + b[4] = '\0'; + year = (int)tor_parse_long(b, 10, 0, INT_MAX, NULL, NULL); + if (year < 1970) { + *time_out = 0; + ++n_bogus_times; + return 0; + } else + return parse_iso_time(s, time_out); +} + +/** We've read a time <b>t</b> from a file stored at <b>stored_at</b>, which + * says we started measuring at <b>started_measuring</b>. Return a new number + * that's about as much before <b>now</b> as <b>t</b> was before + * <b>stored_at</b>. + */ +static inline time_t +correct_time(time_t t, time_t now, time_t stored_at, time_t started_measuring) +{ + if (t < started_measuring - 24*60*60*365) + return 0; + else if (t < started_measuring) + return started_measuring; + else if (t > stored_at) + return 0; + else { + long run_length = stored_at - t; + t = (time_t)(now - run_length); + if (t < started_measuring) + t = started_measuring; + return t; + } +} + +/** Load MTBF data from disk. Returns 0 on success or recoverable error, -1 + * on failure. */ +int +rep_hist_load_mtbf_data(time_t now) +{ + /* XXXX won't handle being called while history is already populated. */ + smartlist_t *lines; + const char *line = NULL; + int r=0, i; + time_t last_downrated = 0, stored_at = 0, tracked_since = 0; + time_t latest_possible_start = now; + long format = -1; + + { + char *filename = get_datadir_fname("router-stability"); + char *d = read_file_to_str(filename, RFTS_IGNORE_MISSING, NULL); + tor_free(filename); + if (!d) + return -1; + lines = smartlist_new(); + smartlist_split_string(lines, d, "\n", SPLIT_SKIP_SPACE, 0); + tor_free(d); + } + + { + const char *firstline; + if (smartlist_len(lines)>4) { + firstline = smartlist_get(lines, 0); + if (!strcmpstart(firstline, "format ")) + format = tor_parse_long(firstline+strlen("format "), + 10, -1, LONG_MAX, NULL, NULL); + } + } + if (format != 1 && format != 2) { + log_warn(LD_HIST, + "Unrecognized format in mtbf history file. Skipping."); + goto err; + } + for (i = 1; i < smartlist_len(lines); ++i) { + line = smartlist_get(lines, i); + if (!strcmp(line, "data")) + break; + if (!strcmpstart(line, "last-downrated ")) { + if (parse_iso_time(line+strlen("last-downrated "), &last_downrated)<0) + log_warn(LD_HIST,"Couldn't parse downrate time in mtbf " + "history file."); + } + if (!strcmpstart(line, "stored-at ")) { + if (parse_iso_time(line+strlen("stored-at "), &stored_at)<0) + log_warn(LD_HIST,"Couldn't parse stored time in mtbf " + "history file."); + } + if (!strcmpstart(line, "tracked-since ")) { + if (parse_iso_time(line+strlen("tracked-since "), &tracked_since)<0) + log_warn(LD_HIST,"Couldn't parse started-tracking time in mtbf " + "history file."); + } + } + if (last_downrated > now) + last_downrated = now; + if (tracked_since > now) + tracked_since = now; + + if (!stored_at) { + log_warn(LD_HIST, "No stored time recorded."); + goto err; + } + + if (line && !strcmp(line, "data")) + ++i; + + n_bogus_times = 0; + + for (; i < smartlist_len(lines); ++i) { + char digest[DIGEST_LEN]; + char hexbuf[HEX_DIGEST_LEN+1]; + char mtbf_timebuf[ISO_TIME_LEN+1]; + char wfu_timebuf[ISO_TIME_LEN+1]; + time_t start_of_run = 0; + time_t start_of_downtime = 0; + int have_mtbf = 0, have_wfu = 0; + long wrl = 0; + double trw = 0; + long wt_uptime = 0, total_wt_time = 0; + int n; + or_history_t *hist; + line = smartlist_get(lines, i); + if (!strcmp(line, ".")) + break; + + mtbf_timebuf[0] = '\0'; + wfu_timebuf[0] = '\0'; + + if (format == 1) { + n = tor_sscanf(line, "%40s %ld %lf S=%10s %8s", + hexbuf, &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11); + if (n != 3 && n != 5) { + log_warn(LD_HIST, "Couldn't scan line %s", escaped(line)); + continue; + } + have_mtbf = 1; + } else { + // format == 2. + int mtbf_idx, wfu_idx; + if (strcmpstart(line, "R ") || strlen(line) < 2+HEX_DIGEST_LEN) + continue; + strlcpy(hexbuf, line+2, sizeof(hexbuf)); + mtbf_idx = find_next_with(lines, i+1, "+MTBF "); + wfu_idx = find_next_with(lines, i+1, "+WFU "); + if (mtbf_idx >= 0) { + const char *mtbfline = smartlist_get(lines, mtbf_idx); + n = tor_sscanf(mtbfline, "+MTBF %lu %lf S=%10s %8s", + &wrl, &trw, mtbf_timebuf, mtbf_timebuf+11); + if (n == 2 || n == 4) { + have_mtbf = 1; + } else { + log_warn(LD_HIST, "Couldn't scan +MTBF line %s", + escaped(mtbfline)); + } + } + if (wfu_idx >= 0) { + const char *wfuline = smartlist_get(lines, wfu_idx); + n = tor_sscanf(wfuline, "+WFU %lu %lu S=%10s %8s", + &wt_uptime, &total_wt_time, + wfu_timebuf, wfu_timebuf+11); + if (n == 2 || n == 4) { + have_wfu = 1; + } else { + log_warn(LD_HIST, "Couldn't scan +WFU line %s", escaped(wfuline)); + } + } + if (wfu_idx > i) + i = wfu_idx; + if (mtbf_idx > i) + i = mtbf_idx; + } + if (base16_decode(digest, DIGEST_LEN, + hexbuf, HEX_DIGEST_LEN) != DIGEST_LEN) { + log_warn(LD_HIST, "Couldn't hex string %s", escaped(hexbuf)); + continue; + } + hist = get_or_history(digest); + if (!hist) + continue; + + if (have_mtbf) { + if (mtbf_timebuf[0]) { + mtbf_timebuf[10] = ' '; + if (parse_possibly_bad_iso_time(mtbf_timebuf, &start_of_run)<0) + log_warn(LD_HIST, "Couldn't parse time %s", + escaped(mtbf_timebuf)); + } + hist->start_of_run = correct_time(start_of_run, now, stored_at, + tracked_since); + if (hist->start_of_run < latest_possible_start + wrl) + latest_possible_start = (time_t)(hist->start_of_run - wrl); + + hist->weighted_run_length = wrl; + hist->total_run_weights = trw; + } + if (have_wfu) { + if (wfu_timebuf[0]) { + wfu_timebuf[10] = ' '; + if (parse_possibly_bad_iso_time(wfu_timebuf, &start_of_downtime)<0) + log_warn(LD_HIST, "Couldn't parse time %s", escaped(wfu_timebuf)); + } + } + hist->start_of_downtime = correct_time(start_of_downtime, now, stored_at, + tracked_since); + hist->weighted_uptime = wt_uptime; + hist->total_weighted_time = total_wt_time; + } + if (strcmp(line, ".")) + log_warn(LD_HIST, "Truncated MTBF file."); + + if (tracked_since < 86400*365) /* Recover from insanely early value. */ + tracked_since = latest_possible_start; + + stability_last_downrated = last_downrated; + started_tracking_stability = tracked_since; + + goto done; + err: + r = -1; + done: + SMARTLIST_FOREACH(lines, char *, cp, tor_free(cp)); + smartlist_free(lines); + return r; +} + +/** For how many seconds do we keep track of individual per-second bandwidth + * totals? */ +#define NUM_SECS_ROLLING_MEASURE 10 +/** How large are the intervals for which we track and report bandwidth use? */ +#define NUM_SECS_BW_SUM_INTERVAL (24*60*60) +/** How far in the past do we remember and publish bandwidth use? */ +#define NUM_SECS_BW_SUM_IS_VALID (5*24*60*60) +/** How many bandwidth usage intervals do we remember? (derived) */ +#define NUM_TOTALS (NUM_SECS_BW_SUM_IS_VALID/NUM_SECS_BW_SUM_INTERVAL) + +/** Structure to track bandwidth use, and remember the maxima for a given + * time period. + */ +typedef struct bw_array_t { + /** Observation array: Total number of bytes transferred in each of the last + * NUM_SECS_ROLLING_MEASURE seconds. This is used as a circular array. */ + uint64_t obs[NUM_SECS_ROLLING_MEASURE]; + int cur_obs_idx; /**< Current position in obs. */ + time_t cur_obs_time; /**< Time represented in obs[cur_obs_idx] */ + uint64_t total_obs; /**< Total for all members of obs except + * obs[cur_obs_idx] */ + uint64_t max_total; /**< Largest value that total_obs has taken on in the + * current period. */ + uint64_t total_in_period; /**< Total bytes transferred in the current + * period. */ + + /** When does the next period begin? */ + time_t next_period; + /** Where in 'maxima' should the maximum bandwidth usage for the current + * period be stored? */ + int next_max_idx; + /** How many values in maxima/totals have been set ever? */ + int num_maxes_set; + /** Circular array of the maximum + * bandwidth-per-NUM_SECS_ROLLING_MEASURE usage for the last + * NUM_TOTALS periods */ + uint64_t maxima[NUM_TOTALS]; + /** Circular array of the total bandwidth usage for the last NUM_TOTALS + * periods */ + uint64_t totals[NUM_TOTALS]; +} bw_array_t; + +/** Shift the current period of b forward by one. */ +static void +commit_max(bw_array_t *b) +{ + /* Store total from current period. */ + b->totals[b->next_max_idx] = b->total_in_period; + /* Store maximum from current period. */ + b->maxima[b->next_max_idx++] = b->max_total; + /* Advance next_period and next_max_idx */ + b->next_period += NUM_SECS_BW_SUM_INTERVAL; + if (b->next_max_idx == NUM_TOTALS) + b->next_max_idx = 0; + if (b->num_maxes_set < NUM_TOTALS) + ++b->num_maxes_set; + /* Reset max_total. */ + b->max_total = 0; + /* Reset total_in_period. */ + b->total_in_period = 0; +} + +/** Shift the current observation time of <b>b</b> forward by one second. */ +static inline void +advance_obs(bw_array_t *b) +{ + int nextidx; + uint64_t total; + + /* Calculate the total bandwidth for the last NUM_SECS_ROLLING_MEASURE + * seconds; adjust max_total as needed.*/ + total = b->total_obs + b->obs[b->cur_obs_idx]; + if (total > b->max_total) + b->max_total = total; + + nextidx = b->cur_obs_idx+1; + if (nextidx == NUM_SECS_ROLLING_MEASURE) + nextidx = 0; + + b->total_obs = total - b->obs[nextidx]; + b->obs[nextidx]=0; + b->cur_obs_idx = nextidx; + + if (++b->cur_obs_time >= b->next_period) + commit_max(b); +} + +/** Add <b>n</b> bytes to the number of bytes in <b>b</b> for second + * <b>when</b>. */ +static inline void +add_obs(bw_array_t *b, time_t when, uint64_t n) +{ + if (when < b->cur_obs_time) + return; /* Don't record data in the past. */ + + /* If we're currently adding observations for an earlier second than + * 'when', advance b->cur_obs_time and b->cur_obs_idx by an + * appropriate number of seconds, and do all the other housekeeping. */ + while (when > b->cur_obs_time) { + /* Doing this one second at a time is potentially inefficient, if we start + with a state file that is very old. Fortunately, it doesn't seem to + show up in profiles, so we can just ignore it for now. */ + advance_obs(b); + } + + b->obs[b->cur_obs_idx] += n; + b->total_in_period += n; +} + +/** Allocate, initialize, and return a new bw_array. */ +static bw_array_t * +bw_array_new(void) +{ + bw_array_t *b; + time_t start; + b = tor_malloc_zero(sizeof(bw_array_t)); + rephist_total_alloc += sizeof(bw_array_t); + start = time(NULL); + b->cur_obs_time = start; + b->next_period = start + NUM_SECS_BW_SUM_INTERVAL; + return b; +} + +#define bw_array_free(val) \ + FREE_AND_NULL(bw_array_t, bw_array_free_, (val)) + +/** Free storage held by bandwidth array <b>b</b>. */ +static void +bw_array_free_(bw_array_t *b) +{ + if (!b) { + return; + } + + rephist_total_alloc -= sizeof(bw_array_t); + tor_free(b); +} + +/** Recent history of bandwidth observations for read operations. */ +static bw_array_t *read_array = NULL; +/** Recent history of bandwidth observations for write operations. */ +static bw_array_t *write_array = NULL; +/** Recent history of bandwidth observations for read operations for the + directory protocol. */ +static bw_array_t *dir_read_array = NULL; +/** Recent history of bandwidth observations for write operations for the + directory protocol. */ +static bw_array_t *dir_write_array = NULL; + +/** Set up [dir-]read_array and [dir-]write_array, freeing them if they + * already exist. */ +static void +bw_arrays_init(void) +{ + bw_array_free(read_array); + bw_array_free(write_array); + bw_array_free(dir_read_array); + bw_array_free(dir_write_array); + + read_array = bw_array_new(); + write_array = bw_array_new(); + dir_read_array = bw_array_new(); + dir_write_array = bw_array_new(); +} + +/** Remember that we read <b>num_bytes</b> bytes in second <b>when</b>. + * + * Add num_bytes to the current running total for <b>when</b>. + * + * <b>when</b> can go back to time, but it's safe to ignore calls + * earlier than the latest <b>when</b> you've heard of. + */ +void +rep_hist_note_bytes_written(size_t num_bytes, time_t when) +{ +/* Maybe a circular array for recent seconds, and step to a new point + * every time a new second shows up. Or simpler is to just to have + * a normal array and push down each item every second; it's short. + */ +/* When a new second has rolled over, compute the sum of the bytes we've + * seen over when-1 to when-1-NUM_SECS_ROLLING_MEASURE, and stick it + * somewhere. See rep_hist_bandwidth_assess() below. + */ + add_obs(write_array, when, num_bytes); +} + +/** Remember that we wrote <b>num_bytes</b> bytes in second <b>when</b>. + * (like rep_hist_note_bytes_written() above) + */ +void +rep_hist_note_bytes_read(size_t num_bytes, time_t when) +{ +/* if we're smart, we can make this func and the one above share code */ + add_obs(read_array, when, num_bytes); +} + +/** Remember that we wrote <b>num_bytes</b> directory bytes in second + * <b>when</b>. (like rep_hist_note_bytes_written() above) + */ +void +rep_hist_note_dir_bytes_written(size_t num_bytes, time_t when) +{ + add_obs(dir_write_array, when, num_bytes); +} + +/** Remember that we read <b>num_bytes</b> directory bytes in second + * <b>when</b>. (like rep_hist_note_bytes_written() above) + */ +void +rep_hist_note_dir_bytes_read(size_t num_bytes, time_t when) +{ + add_obs(dir_read_array, when, num_bytes); +} + +/** Helper: Return the largest value in b->maxima. (This is equal to the + * most bandwidth used in any NUM_SECS_ROLLING_MEASURE period for the last + * NUM_SECS_BW_SUM_IS_VALID seconds.) + */ +static uint64_t +find_largest_max(bw_array_t *b) +{ + int i; + uint64_t max; + max=0; + for (i=0; i<NUM_TOTALS; ++i) { + if (b->maxima[i]>max) + max = b->maxima[i]; + } + return max; +} + +/** Find the largest sums in the past NUM_SECS_BW_SUM_IS_VALID (roughly) + * seconds. Find one sum for reading and one for writing. They don't have + * to be at the same time. + * + * Return the smaller of these sums, divided by NUM_SECS_ROLLING_MEASURE. + */ +int +rep_hist_bandwidth_assess(void) +{ + uint64_t w,r; + r = find_largest_max(read_array); + w = find_largest_max(write_array); + if (r>w) + return (int)(((double)w)/NUM_SECS_ROLLING_MEASURE); + else + return (int)(((double)r)/NUM_SECS_ROLLING_MEASURE); +} + +/** Print the bandwidth history of b (either [dir-]read_array or + * [dir-]write_array) into the buffer pointed to by buf. The format is + * simply comma separated numbers, from oldest to newest. + * + * It returns the number of bytes written. + */ +static size_t +rep_hist_fill_bandwidth_history(char *buf, size_t len, const bw_array_t *b) +{ + char *cp = buf; + int i, n; + const or_options_t *options = get_options(); + uint64_t cutoff; + + if (b->num_maxes_set <= b->next_max_idx) { + /* We haven't been through the circular array yet; time starts at i=0.*/ + i = 0; + } else { + /* We've been around the array at least once. The next i to be + overwritten is the oldest. */ + i = b->next_max_idx; + } + + if (options->RelayBandwidthRate) { + /* We don't want to report that we used more bandwidth than the max we're + * willing to relay; otherwise everybody will know how much traffic + * we used ourself. */ + cutoff = options->RelayBandwidthRate * NUM_SECS_BW_SUM_INTERVAL; + } else { + cutoff = UINT64_MAX; + } + + for (n=0; n<b->num_maxes_set; ++n,++i) { + uint64_t total; + if (i >= NUM_TOTALS) + i -= NUM_TOTALS; + tor_assert(i < NUM_TOTALS); + /* Round the bandwidth used down to the nearest 1k. */ + total = b->totals[i] & ~0x3ff; + if (total > cutoff) + total = cutoff; + + if (n==(b->num_maxes_set-1)) + tor_snprintf(cp, len-(cp-buf), "%"PRIu64, (total)); + else + tor_snprintf(cp, len-(cp-buf), "%"PRIu64",", (total)); + cp += strlen(cp); + } + return cp-buf; +} + +/** Allocate and return lines for representing this server's bandwidth + * history in its descriptor. We publish these lines in our extra-info + * descriptor. + */ +char * +rep_hist_get_bandwidth_lines(void) +{ + char *buf, *cp; + char t[ISO_TIME_LEN+1]; + int r; + bw_array_t *b = NULL; + const char *desc = NULL; + size_t len; + + /* [dirreq-](read|write)-history yyyy-mm-dd HH:MM:SS (n s) n,n,n... */ +/* The n,n,n part above. Largest representation of a uint64_t is 20 chars + * long, plus the comma. */ +#define MAX_HIST_VALUE_LEN (21*NUM_TOTALS) + len = (67+MAX_HIST_VALUE_LEN)*4; + buf = tor_malloc_zero(len); + cp = buf; + for (r=0;r<4;++r) { + char tmp[MAX_HIST_VALUE_LEN]; + size_t slen; + switch (r) { + case 0: + b = write_array; + desc = "write-history"; + break; + case 1: + b = read_array; + desc = "read-history"; + break; + case 2: + b = dir_write_array; + desc = "dirreq-write-history"; + break; + case 3: + b = dir_read_array; + desc = "dirreq-read-history"; + break; + } + tor_assert(b); + slen = rep_hist_fill_bandwidth_history(tmp, MAX_HIST_VALUE_LEN, b); + /* If we don't have anything to write, skip to the next entry. */ + if (slen == 0) + continue; + format_iso_time(t, b->next_period-NUM_SECS_BW_SUM_INTERVAL); + tor_snprintf(cp, len-(cp-buf), "%s %s (%d s) ", + desc, t, NUM_SECS_BW_SUM_INTERVAL); + cp += strlen(cp); + strlcat(cp, tmp, len-(cp-buf)); + cp += slen; + strlcat(cp, "\n", len-(cp-buf)); + ++cp; + } + return buf; +} + +/** Write a single bw_array_t into the Values, Ends, Interval, and Maximum + * entries of an or_state_t. Done before writing out a new state file. */ +static void +rep_hist_update_bwhist_state_section(or_state_t *state, + const bw_array_t *b, + smartlist_t **s_values, + smartlist_t **s_maxima, + time_t *s_begins, + int *s_interval) +{ + int i,j; + uint64_t maxval; + + if (*s_values) { + SMARTLIST_FOREACH(*s_values, char *, val, tor_free(val)); + smartlist_free(*s_values); + } + if (*s_maxima) { + SMARTLIST_FOREACH(*s_maxima, char *, val, tor_free(val)); + smartlist_free(*s_maxima); + } + if (! server_mode(get_options())) { + /* Clients don't need to store bandwidth history persistently; + * force these values to the defaults. */ + /* FFFF we should pull the default out of config.c's state table, + * so we don't have two defaults. */ + if (*s_begins != 0 || *s_interval != 900) { + time_t now = time(NULL); + time_t save_at = get_options()->AvoidDiskWrites ? now+3600 : now+600; + or_state_mark_dirty(state, save_at); + } + *s_begins = 0; + *s_interval = 900; + *s_values = smartlist_new(); + *s_maxima = smartlist_new(); + return; + } + *s_begins = b->next_period; + *s_interval = NUM_SECS_BW_SUM_INTERVAL; + + *s_values = smartlist_new(); + *s_maxima = smartlist_new(); + /* Set i to first position in circular array */ + i = (b->num_maxes_set <= b->next_max_idx) ? 0 : b->next_max_idx; + for (j=0; j < b->num_maxes_set; ++j,++i) { + if (i >= NUM_TOTALS) + i = 0; + smartlist_add_asprintf(*s_values, "%"PRIu64, + (b->totals[i] & ~0x3ff)); + maxval = b->maxima[i] / NUM_SECS_ROLLING_MEASURE; + smartlist_add_asprintf(*s_maxima, "%"PRIu64, + (maxval & ~0x3ff)); + } + smartlist_add_asprintf(*s_values, "%"PRIu64, + (b->total_in_period & ~0x3ff)); + maxval = b->max_total / NUM_SECS_ROLLING_MEASURE; + smartlist_add_asprintf(*s_maxima, "%"PRIu64, + (maxval & ~0x3ff)); +} + +/** Update <b>state</b> with the newest bandwidth history. Done before + * writing out a new state file. */ +void +rep_hist_update_state(or_state_t *state) +{ +#define UPDATE(arrname,st) \ + rep_hist_update_bwhist_state_section(state,\ + (arrname),\ + &state->BWHistory ## st ## Values, \ + &state->BWHistory ## st ## Maxima, \ + &state->BWHistory ## st ## Ends, \ + &state->BWHistory ## st ## Interval) + + UPDATE(write_array, Write); + UPDATE(read_array, Read); + UPDATE(dir_write_array, DirWrite); + UPDATE(dir_read_array, DirRead); + + if (server_mode(get_options())) { + or_state_mark_dirty(state, time(NULL)+(2*3600)); + } +#undef UPDATE +} + +/** Load a single bw_array_t from its Values, Ends, Maxima, and Interval + * entries in an or_state_t. Done while reading the state file. */ +static int +rep_hist_load_bwhist_state_section(bw_array_t *b, + const smartlist_t *s_values, + const smartlist_t *s_maxima, + const time_t s_begins, + const int s_interval) +{ + time_t now = time(NULL); + int retval = 0; + time_t start; + + uint64_t v, mv; + int i,ok,ok_m = 0; + int have_maxima = s_maxima && s_values && + (smartlist_len(s_values) == smartlist_len(s_maxima)); + + if (s_values && s_begins >= now - NUM_SECS_BW_SUM_INTERVAL*NUM_TOTALS) { + start = s_begins - s_interval*(smartlist_len(s_values)); + if (start > now) + return 0; + b->cur_obs_time = start; + b->next_period = start + NUM_SECS_BW_SUM_INTERVAL; + SMARTLIST_FOREACH_BEGIN(s_values, const char *, cp) { + const char *maxstr = NULL; + v = tor_parse_uint64(cp, 10, 0, UINT64_MAX, &ok, NULL); + if (have_maxima) { + maxstr = smartlist_get(s_maxima, cp_sl_idx); + mv = tor_parse_uint64(maxstr, 10, 0, UINT64_MAX, &ok_m, NULL); + mv *= NUM_SECS_ROLLING_MEASURE; + } else { + /* No maxima known; guess average rate to be conservative. */ + mv = (v / s_interval) * NUM_SECS_ROLLING_MEASURE; + } + if (!ok) { + retval = -1; + log_notice(LD_HIST, "Could not parse value '%s' into a number.'",cp); + } + if (maxstr && !ok_m) { + retval = -1; + log_notice(LD_HIST, "Could not parse maximum '%s' into a number.'", + maxstr); + } + + if (start < now) { + time_t cur_start = start; + time_t actual_interval_len = s_interval; + uint64_t cur_val = 0; + /* Calculate the average per second. This is the best we can do + * because our state file doesn't have per-second resolution. */ + if (start + s_interval > now) + actual_interval_len = now - start; + cur_val = v / actual_interval_len; + /* This is potentially inefficient, but since we don't do it very + * often it should be ok. */ + while (cur_start < start + actual_interval_len) { + add_obs(b, cur_start, cur_val); + ++cur_start; + } + b->max_total = mv; + /* This will result in some fairly choppy history if s_interval + * is not the same as NUM_SECS_BW_SUM_INTERVAL. XXXX */ + start += actual_interval_len; + } + } SMARTLIST_FOREACH_END(cp); + } + + /* Clean up maxima and observed */ + for (i=0; i<NUM_SECS_ROLLING_MEASURE; ++i) { + b->obs[i] = 0; + } + b->total_obs = 0; + + return retval; +} + +/** Set bandwidth history from the state file we just loaded. */ +int +rep_hist_load_state(or_state_t *state, char **err) +{ + int all_ok = 1; + + /* Assert they already have been malloced */ + tor_assert(read_array && write_array); + tor_assert(dir_read_array && dir_write_array); + +#define LOAD(arrname,st) \ + if (rep_hist_load_bwhist_state_section( \ + (arrname), \ + state->BWHistory ## st ## Values, \ + state->BWHistory ## st ## Maxima, \ + state->BWHistory ## st ## Ends, \ + state->BWHistory ## st ## Interval)<0) \ + all_ok = 0 + + LOAD(write_array, Write); + LOAD(read_array, Read); + LOAD(dir_write_array, DirWrite); + LOAD(dir_read_array, DirRead); + +#undef LOAD + if (!all_ok) { + *err = tor_strdup("Parsing of bandwidth history values failed"); + /* and create fresh arrays */ + bw_arrays_init(); + return -1; + } + return 0; +} + +/*********************************************************************/ + +/** A single predicted port: used to remember which ports we've made + * connections to, so that we can try to keep making circuits that can handle + * those ports. */ +typedef struct predicted_port_t { + /** The port we connected to */ + uint16_t port; + /** The time at which we last used it */ + time_t time; +} predicted_port_t; + +/** A list of port numbers that have been used recently. */ +static smartlist_t *predicted_ports_list=NULL; +/** How long do we keep predicting circuits? */ +static int prediction_timeout=0; +/** When was the last time we added a prediction entry (HS or port) */ +static time_t last_prediction_add_time=0; + +/** + * How much time left until we stop predicting circuits? + */ +int +predicted_ports_prediction_time_remaining(time_t now) +{ + time_t idle_delta; + + /* Protect against overflow of return value. This can happen if the clock + * jumps backwards in time. Update the last prediction time (aka last + * active time) to prevent it. This update is preferable to using monotonic + * time because it prevents clock jumps into the past from simply causing + * very long idle timeouts while the monotonic time stands still. */ + if (last_prediction_add_time > now) { + last_prediction_add_time = now; + idle_delta = 0; + } else { + idle_delta = now - last_prediction_add_time; + } + + /* Protect against underflow of the return value. This can happen for very + * large periods of inactivity/system sleep. */ + if (idle_delta > prediction_timeout) + return 0; + + if (BUG((prediction_timeout - idle_delta) > INT_MAX)) { + return INT_MAX; + } + + return (int)(prediction_timeout - idle_delta); +} + +/** We just got an application request for a connection with + * port <b>port</b>. Remember it for the future, so we can keep + * some circuits open that will exit to this port. + */ +static void +add_predicted_port(time_t now, uint16_t port) +{ + predicted_port_t *pp = tor_malloc(sizeof(predicted_port_t)); + + // If the list is empty, re-randomize predicted ports lifetime + if (!any_predicted_circuits(now)) { + prediction_timeout = channelpadding_get_circuits_available_timeout(); + } + + last_prediction_add_time = now; + + log_info(LD_CIRC, + "New port prediction added. Will continue predictive circ building " + "for %d more seconds.", + predicted_ports_prediction_time_remaining(now)); + + pp->port = port; + pp->time = now; + rephist_total_alloc += sizeof(*pp); + smartlist_add(predicted_ports_list, pp); +} + +/** + * Allocate whatever memory and structs are needed for predicting + * which ports will be used. Also seed it with port 80, so we'll build + * circuits on start-up. + */ +static void +predicted_ports_alloc(void) +{ + predicted_ports_list = smartlist_new(); +} + +void +predicted_ports_init(void) +{ + add_predicted_port(time(NULL), 443); // Add a port to get us started +} + +/** Free whatever memory is needed for predicting which ports will + * be used. + */ +static void +predicted_ports_free_all(void) +{ + rephist_total_alloc -= + smartlist_len(predicted_ports_list)*sizeof(predicted_port_t); + SMARTLIST_FOREACH(predicted_ports_list, predicted_port_t *, + pp, tor_free(pp)); + smartlist_free(predicted_ports_list); +} + +/** Remember that <b>port</b> has been asked for as of time <b>now</b>. + * This is used for predicting what sorts of streams we'll make in the + * future and making exit circuits to anticipate that. + */ +void +rep_hist_note_used_port(time_t now, uint16_t port) +{ + tor_assert(predicted_ports_list); + + if (!port) /* record nothing */ + return; + + SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) { + if (pp->port == port) { + pp->time = now; + + last_prediction_add_time = now; + log_info(LD_CIRC, + "New port prediction added. Will continue predictive circ " + "building for %d more seconds.", + predicted_ports_prediction_time_remaining(now)); + return; + } + } SMARTLIST_FOREACH_END(pp); + /* it's not there yet; we need to add it */ + add_predicted_port(now, port); +} + +/** Return a newly allocated pointer to a list of uint16_t * for ports that + * are likely to be asked for in the near future. + */ +smartlist_t * +rep_hist_get_predicted_ports(time_t now) +{ + int predicted_circs_relevance_time; + smartlist_t *out = smartlist_new(); + tor_assert(predicted_ports_list); + + predicted_circs_relevance_time = prediction_timeout; + + /* clean out obsolete entries */ + SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) { + if (pp->time + predicted_circs_relevance_time < now) { + log_debug(LD_CIRC, "Expiring predicted port %d", pp->port); + + rephist_total_alloc -= sizeof(predicted_port_t); + tor_free(pp); + SMARTLIST_DEL_CURRENT(predicted_ports_list, pp); + } else { + smartlist_add(out, tor_memdup(&pp->port, sizeof(uint16_t))); + } + } SMARTLIST_FOREACH_END(pp); + return out; +} + +/** + * Take a list of uint16_t *, and remove every port in the list from the + * current list of predicted ports. + */ +void +rep_hist_remove_predicted_ports(const smartlist_t *rmv_ports) +{ + /* Let's do this on O(N), not O(N^2). */ + bitarray_t *remove_ports = bitarray_init_zero(UINT16_MAX); + SMARTLIST_FOREACH(rmv_ports, const uint16_t *, p, + bitarray_set(remove_ports, *p)); + SMARTLIST_FOREACH_BEGIN(predicted_ports_list, predicted_port_t *, pp) { + if (bitarray_is_set(remove_ports, pp->port)) { + tor_free(pp); + rephist_total_alloc -= sizeof(*pp); + SMARTLIST_DEL_CURRENT(predicted_ports_list, pp); + } + } SMARTLIST_FOREACH_END(pp); + bitarray_free(remove_ports); +} + +/** The user asked us to do a resolve. Rather than keeping track of + * timings and such of resolves, we fake it for now by treating + * it the same way as a connection to port 80. This way we will continue + * to have circuits lying around if the user only uses Tor for resolves. + */ +void +rep_hist_note_used_resolve(time_t now) +{ + rep_hist_note_used_port(now, 80); +} + +/** The last time at which we needed an internal circ. */ +static time_t predicted_internal_time = 0; +/** The last time we needed an internal circ with good uptime. */ +static time_t predicted_internal_uptime_time = 0; +/** The last time we needed an internal circ with good capacity. */ +static time_t predicted_internal_capacity_time = 0; + +/** Remember that we used an internal circ at time <b>now</b>. */ +void +rep_hist_note_used_internal(time_t now, int need_uptime, int need_capacity) +{ + // If the list is empty, re-randomize predicted ports lifetime + if (!any_predicted_circuits(now)) { + prediction_timeout = channelpadding_get_circuits_available_timeout(); + } + + last_prediction_add_time = now; + + log_info(LD_CIRC, + "New port prediction added. Will continue predictive circ building " + "for %d more seconds.", + predicted_ports_prediction_time_remaining(now)); + + predicted_internal_time = now; + if (need_uptime) + predicted_internal_uptime_time = now; + if (need_capacity) + predicted_internal_capacity_time = now; +} + +/** Return 1 if we've used an internal circ recently; else return 0. */ +int +rep_hist_get_predicted_internal(time_t now, int *need_uptime, + int *need_capacity) +{ + int predicted_circs_relevance_time; + + predicted_circs_relevance_time = prediction_timeout; + + if (!predicted_internal_time) { /* initialize it */ + predicted_internal_time = now; + predicted_internal_uptime_time = now; + predicted_internal_capacity_time = now; + } + if (predicted_internal_time + predicted_circs_relevance_time < now) + return 0; /* too long ago */ + if (predicted_internal_uptime_time + predicted_circs_relevance_time >= now) + *need_uptime = 1; + // Always predict that we need capacity. + *need_capacity = 1; + return 1; +} + +/** Any ports used lately? These are pre-seeded if we just started + * up or if we're running a hidden service. */ +int +any_predicted_circuits(time_t now) +{ + int predicted_circs_relevance_time; + predicted_circs_relevance_time = prediction_timeout; + + return smartlist_len(predicted_ports_list) || + predicted_internal_time + predicted_circs_relevance_time >= now; +} + +/** Return 1 if we have no need for circuits currently, else return 0. */ +int +rep_hist_circbuilding_dormant(time_t now) +{ + const or_options_t *options = get_options(); + + if (any_predicted_circuits(now)) + return 0; + + /* see if we'll still need to build testing circuits */ + if (server_mode(options) && + (!check_whether_orport_reachable(options) || + !circuit_enough_testing_circs())) + return 0; + if (!check_whether_dirport_reachable(options)) + return 0; + + return 1; +} + +/*** Exit port statistics ***/ + +/* Some constants */ +/** To what multiple should byte numbers be rounded up? */ +#define EXIT_STATS_ROUND_UP_BYTES 1024 +/** To what multiple should stream counts be rounded up? */ +#define EXIT_STATS_ROUND_UP_STREAMS 4 +/** Number of TCP ports */ +#define EXIT_STATS_NUM_PORTS 65536 +/** Top n ports that will be included in exit stats. */ +#define EXIT_STATS_TOP_N_PORTS 10 + +/* The following data structures are arrays and no fancy smartlists or maps, + * so that all write operations can be done in constant time. This comes at + * the price of some memory (1.25 MB) and linear complexity when writing + * stats for measuring relays. */ +/** Number of bytes read in current period by exit port */ +static uint64_t *exit_bytes_read = NULL; +/** Number of bytes written in current period by exit port */ +static uint64_t *exit_bytes_written = NULL; +/** Number of streams opened in current period by exit port */ +static uint32_t *exit_streams = NULL; + +/** Start time of exit stats or 0 if we're not collecting exit stats. */ +static time_t start_of_exit_stats_interval; + +/** Initialize exit port stats. */ +void +rep_hist_exit_stats_init(time_t now) +{ + start_of_exit_stats_interval = now; + exit_bytes_read = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint64_t)); + exit_bytes_written = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint64_t)); + exit_streams = tor_calloc(EXIT_STATS_NUM_PORTS, sizeof(uint32_t)); +} + +/** Reset counters for exit port statistics. */ +void +rep_hist_reset_exit_stats(time_t now) +{ + start_of_exit_stats_interval = now; + memset(exit_bytes_read, 0, EXIT_STATS_NUM_PORTS * sizeof(uint64_t)); + memset(exit_bytes_written, 0, EXIT_STATS_NUM_PORTS * sizeof(uint64_t)); + memset(exit_streams, 0, EXIT_STATS_NUM_PORTS * sizeof(uint32_t)); +} + +/** Stop collecting exit port stats in a way that we can re-start doing + * so in rep_hist_exit_stats_init(). */ +void +rep_hist_exit_stats_term(void) +{ + start_of_exit_stats_interval = 0; + tor_free(exit_bytes_read); + tor_free(exit_bytes_written); + tor_free(exit_streams); +} + +/** Helper for qsort: compare two ints. Does not handle overflow properly, + * but works fine for sorting an array of port numbers, which is what we use + * it for. */ +static int +compare_int_(const void *x, const void *y) +{ + return (*(int*)x - *(int*)y); +} + +/** Return a newly allocated string containing the exit port statistics + * until <b>now</b>, or NULL if we're not collecting exit stats. Caller + * must ensure start_of_exit_stats_interval is in the past. */ +char * +rep_hist_format_exit_stats(time_t now) +{ + int i, j, top_elements = 0, cur_min_idx = 0, cur_port; + uint64_t top_bytes[EXIT_STATS_TOP_N_PORTS]; + int top_ports[EXIT_STATS_TOP_N_PORTS]; + uint64_t cur_bytes = 0, other_read = 0, other_written = 0, + total_read = 0, total_written = 0; + uint32_t total_streams = 0, other_streams = 0; + smartlist_t *written_strings, *read_strings, *streams_strings; + char *written_string, *read_string, *streams_string; + char t[ISO_TIME_LEN+1]; + char *result; + + if (!start_of_exit_stats_interval) + return NULL; /* Not initialized. */ + + tor_assert(now >= start_of_exit_stats_interval); + + /* Go through all ports to find the n ports that saw most written and + * read bytes. + * + * Invariant: at the end of the loop for iteration i, + * total_read is the sum of all exit_bytes_read[0..i] + * total_written is the sum of all exit_bytes_written[0..i] + * total_stream is the sum of all exit_streams[0..i] + * + * top_elements = MAX(EXIT_STATS_TOP_N_PORTS, + * #{j | 0 <= j <= i && volume(i) > 0}) + * + * For all 0 <= j < top_elements, + * top_bytes[j] > 0 + * 0 <= top_ports[j] <= 65535 + * top_bytes[j] = volume(top_ports[j]) + * + * There is no j in 0..i and k in 0..top_elements such that: + * volume(j) > top_bytes[k] AND j is not in top_ports[0..top_elements] + * + * There is no j!=cur_min_idx in 0..top_elements such that: + * top_bytes[j] < top_bytes[cur_min_idx] + * + * where volume(x) == exit_bytes_read[x]+exit_bytes_written[x] + * + * Worst case: O(EXIT_STATS_NUM_PORTS * EXIT_STATS_TOP_N_PORTS) + */ + for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) { + total_read += exit_bytes_read[i]; + total_written += exit_bytes_written[i]; + total_streams += exit_streams[i]; + cur_bytes = exit_bytes_read[i] + exit_bytes_written[i]; + if (cur_bytes == 0) { + continue; + } + if (top_elements < EXIT_STATS_TOP_N_PORTS) { + top_bytes[top_elements] = cur_bytes; + top_ports[top_elements++] = i; + } else if (cur_bytes > top_bytes[cur_min_idx]) { + top_bytes[cur_min_idx] = cur_bytes; + top_ports[cur_min_idx] = i; + } else { + continue; + } + cur_min_idx = 0; + for (j = 1; j < top_elements; j++) { + if (top_bytes[j] < top_bytes[cur_min_idx]) { + cur_min_idx = j; + } + } + } + + /* Add observations of top ports to smartlists. */ + written_strings = smartlist_new(); + read_strings = smartlist_new(); + streams_strings = smartlist_new(); + other_read = total_read; + other_written = total_written; + other_streams = total_streams; + /* Sort the ports; this puts them out of sync with top_bytes, but we + * won't be using top_bytes again anyway */ + qsort(top_ports, top_elements, sizeof(int), compare_int_); + for (j = 0; j < top_elements; j++) { + cur_port = top_ports[j]; + if (exit_bytes_written[cur_port] > 0) { + uint64_t num = round_uint64_to_next_multiple_of( + exit_bytes_written[cur_port], + EXIT_STATS_ROUND_UP_BYTES); + num /= 1024; + smartlist_add_asprintf(written_strings, "%d=%"PRIu64, + cur_port, (num)); + other_written -= exit_bytes_written[cur_port]; + } + if (exit_bytes_read[cur_port] > 0) { + uint64_t num = round_uint64_to_next_multiple_of( + exit_bytes_read[cur_port], + EXIT_STATS_ROUND_UP_BYTES); + num /= 1024; + smartlist_add_asprintf(read_strings, "%d=%"PRIu64, + cur_port, (num)); + other_read -= exit_bytes_read[cur_port]; + } + if (exit_streams[cur_port] > 0) { + uint32_t num = round_uint32_to_next_multiple_of( + exit_streams[cur_port], + EXIT_STATS_ROUND_UP_STREAMS); + smartlist_add_asprintf(streams_strings, "%d=%u", cur_port, num); + other_streams -= exit_streams[cur_port]; + } + } + + /* Add observations of other ports in a single element. */ + other_written = round_uint64_to_next_multiple_of(other_written, + EXIT_STATS_ROUND_UP_BYTES); + other_written /= 1024; + smartlist_add_asprintf(written_strings, "other=%"PRIu64, + (other_written)); + other_read = round_uint64_to_next_multiple_of(other_read, + EXIT_STATS_ROUND_UP_BYTES); + other_read /= 1024; + smartlist_add_asprintf(read_strings, "other=%"PRIu64, + (other_read)); + other_streams = round_uint32_to_next_multiple_of(other_streams, + EXIT_STATS_ROUND_UP_STREAMS); + smartlist_add_asprintf(streams_strings, "other=%u", other_streams); + + /* Join all observations in single strings. */ + written_string = smartlist_join_strings(written_strings, ",", 0, NULL); + read_string = smartlist_join_strings(read_strings, ",", 0, NULL); + streams_string = smartlist_join_strings(streams_strings, ",", 0, NULL); + SMARTLIST_FOREACH(written_strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(read_strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(streams_strings, char *, cp, tor_free(cp)); + smartlist_free(written_strings); + smartlist_free(read_strings); + smartlist_free(streams_strings); + + /* Put everything together. */ + format_iso_time(t, now); + tor_asprintf(&result, "exit-stats-end %s (%d s)\n" + "exit-kibibytes-written %s\n" + "exit-kibibytes-read %s\n" + "exit-streams-opened %s\n", + t, (unsigned) (now - start_of_exit_stats_interval), + written_string, + read_string, + streams_string); + tor_free(written_string); + tor_free(read_string); + tor_free(streams_string); + return result; +} + +/** If 24 hours have passed since the beginning of the current exit port + * stats period, write exit stats to $DATADIR/stats/exit-stats (possibly + * overwriting an existing file) and reset counters. Return when we would + * next want to write exit stats or 0 if we never want to write. */ +time_t +rep_hist_exit_stats_write(time_t now) +{ + char *str = NULL; + + if (!start_of_exit_stats_interval) + return 0; /* Not initialized. */ + if (start_of_exit_stats_interval + WRITE_STATS_INTERVAL > now) + goto done; /* Not ready to write. */ + + log_info(LD_HIST, "Writing exit port statistics to disk."); + + /* Generate history string. */ + str = rep_hist_format_exit_stats(now); + + /* Reset counters. */ + rep_hist_reset_exit_stats(now); + + /* Try to write to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "exit-stats", str, "exit port statistics"); + } + + done: + tor_free(str); + return start_of_exit_stats_interval + WRITE_STATS_INTERVAL; +} + +/** Note that we wrote <b>num_written</b> bytes and read <b>num_read</b> + * bytes to/from an exit connection to <b>port</b>. */ +void +rep_hist_note_exit_bytes(uint16_t port, size_t num_written, + size_t num_read) +{ + if (!start_of_exit_stats_interval) + return; /* Not initialized. */ + exit_bytes_written[port] += num_written; + exit_bytes_read[port] += num_read; + log_debug(LD_HIST, "Written %lu bytes and read %lu bytes to/from an " + "exit connection to port %d.", + (unsigned long)num_written, (unsigned long)num_read, port); +} + +/** Note that we opened an exit stream to <b>port</b>. */ +void +rep_hist_note_exit_stream_opened(uint16_t port) +{ + if (!start_of_exit_stats_interval) + return; /* Not initialized. */ + exit_streams[port]++; + log_debug(LD_HIST, "Opened exit stream to port %d", port); +} + +/*** cell statistics ***/ + +/** Start of the current buffer stats interval or 0 if we're not + * collecting buffer statistics. */ +static time_t start_of_buffer_stats_interval; + +/** Initialize buffer stats. */ +void +rep_hist_buffer_stats_init(time_t now) +{ + start_of_buffer_stats_interval = now; +} + +/** Statistics from a single circuit. Collected when the circuit closes, or + * when we flush statistics to disk. */ +typedef struct circ_buffer_stats_t { + /** Average number of cells in the circuit's queue */ + double mean_num_cells_in_queue; + /** Average time a cell waits in the queue. */ + double mean_time_cells_in_queue; + /** Total number of cells sent over this circuit */ + uint32_t processed_cells; +} circ_buffer_stats_t; + +/** List of circ_buffer_stats_t. */ +static smartlist_t *circuits_for_buffer_stats = NULL; + +/** Remember cell statistics <b>mean_num_cells_in_queue</b>, + * <b>mean_time_cells_in_queue</b>, and <b>processed_cells</b> of a + * circuit. */ +void +rep_hist_add_buffer_stats(double mean_num_cells_in_queue, + double mean_time_cells_in_queue, uint32_t processed_cells) +{ + circ_buffer_stats_t *stats; + if (!start_of_buffer_stats_interval) + return; /* Not initialized. */ + stats = tor_malloc_zero(sizeof(circ_buffer_stats_t)); + stats->mean_num_cells_in_queue = mean_num_cells_in_queue; + stats->mean_time_cells_in_queue = mean_time_cells_in_queue; + stats->processed_cells = processed_cells; + if (!circuits_for_buffer_stats) + circuits_for_buffer_stats = smartlist_new(); + smartlist_add(circuits_for_buffer_stats, stats); +} + +/** Remember cell statistics for circuit <b>circ</b> at time + * <b>end_of_interval</b> and reset cell counters in case the circuit + * remains open in the next measurement interval. */ +void +rep_hist_buffer_stats_add_circ(circuit_t *circ, time_t end_of_interval) +{ + time_t start_of_interval; + int interval_length; + or_circuit_t *orcirc; + double mean_num_cells_in_queue, mean_time_cells_in_queue; + uint32_t processed_cells; + if (CIRCUIT_IS_ORIGIN(circ)) + return; + orcirc = TO_OR_CIRCUIT(circ); + if (!orcirc->processed_cells) + return; + start_of_interval = (circ->timestamp_created.tv_sec > + start_of_buffer_stats_interval) ? + (time_t)circ->timestamp_created.tv_sec : + start_of_buffer_stats_interval; + interval_length = (int) (end_of_interval - start_of_interval); + if (interval_length <= 0) + return; + processed_cells = orcirc->processed_cells; + /* 1000.0 for s -> ms; 2.0 because of app-ward and exit-ward queues */ + mean_num_cells_in_queue = (double) orcirc->total_cell_waiting_time / + (double) interval_length / 1000.0 / 2.0; + mean_time_cells_in_queue = + (double) orcirc->total_cell_waiting_time / + (double) orcirc->processed_cells; + orcirc->total_cell_waiting_time = 0; + orcirc->processed_cells = 0; + rep_hist_add_buffer_stats(mean_num_cells_in_queue, + mean_time_cells_in_queue, + processed_cells); +} + +/** Sorting helper: return -1, 1, or 0 based on comparison of two + * circ_buffer_stats_t */ +static int +buffer_stats_compare_entries_(const void **_a, const void **_b) +{ + const circ_buffer_stats_t *a = *_a, *b = *_b; + if (a->processed_cells < b->processed_cells) + return 1; + else if (a->processed_cells > b->processed_cells) + return -1; + else + return 0; +} + +/** Stop collecting cell stats in a way that we can re-start doing so in + * rep_hist_buffer_stats_init(). */ +void +rep_hist_buffer_stats_term(void) +{ + rep_hist_reset_buffer_stats(0); +} + +/** Clear history of circuit statistics and set the measurement interval + * start to <b>now</b>. */ +void +rep_hist_reset_buffer_stats(time_t now) +{ + if (!circuits_for_buffer_stats) + circuits_for_buffer_stats = smartlist_new(); + SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *, + stats, tor_free(stats)); + smartlist_clear(circuits_for_buffer_stats); + start_of_buffer_stats_interval = now; +} + +/** Return a newly allocated string containing the buffer statistics until + * <b>now</b>, or NULL if we're not collecting buffer stats. Caller must + * ensure start_of_buffer_stats_interval is in the past. */ +char * +rep_hist_format_buffer_stats(time_t now) +{ +#define SHARES 10 + uint64_t processed_cells[SHARES]; + uint32_t circs_in_share[SHARES]; + int number_of_circuits, i; + double queued_cells[SHARES], time_in_queue[SHARES]; + smartlist_t *processed_cells_strings, *queued_cells_strings, + *time_in_queue_strings; + char *processed_cells_string, *queued_cells_string, + *time_in_queue_string; + char t[ISO_TIME_LEN+1]; + char *result; + + if (!start_of_buffer_stats_interval) + return NULL; /* Not initialized. */ + + tor_assert(now >= start_of_buffer_stats_interval); + + /* Calculate deciles if we saw at least one circuit. */ + memset(processed_cells, 0, SHARES * sizeof(uint64_t)); + memset(circs_in_share, 0, SHARES * sizeof(uint32_t)); + memset(queued_cells, 0, SHARES * sizeof(double)); + memset(time_in_queue, 0, SHARES * sizeof(double)); + if (!circuits_for_buffer_stats) + circuits_for_buffer_stats = smartlist_new(); + number_of_circuits = smartlist_len(circuits_for_buffer_stats); + if (number_of_circuits > 0) { + smartlist_sort(circuits_for_buffer_stats, + buffer_stats_compare_entries_); + i = 0; + SMARTLIST_FOREACH_BEGIN(circuits_for_buffer_stats, + circ_buffer_stats_t *, stats) + { + int share = i++ * SHARES / number_of_circuits; + processed_cells[share] += stats->processed_cells; + queued_cells[share] += stats->mean_num_cells_in_queue; + time_in_queue[share] += stats->mean_time_cells_in_queue; + circs_in_share[share]++; + } + SMARTLIST_FOREACH_END(stats); + } + + /* Write deciles to strings. */ + processed_cells_strings = smartlist_new(); + queued_cells_strings = smartlist_new(); + time_in_queue_strings = smartlist_new(); + for (i = 0; i < SHARES; i++) { + smartlist_add_asprintf(processed_cells_strings, + "%"PRIu64, !circs_in_share[i] ? 0 : + (processed_cells[i] / + circs_in_share[i])); + } + for (i = 0; i < SHARES; i++) { + smartlist_add_asprintf(queued_cells_strings, "%.2f", + circs_in_share[i] == 0 ? 0.0 : + queued_cells[i] / (double) circs_in_share[i]); + } + for (i = 0; i < SHARES; i++) { + smartlist_add_asprintf(time_in_queue_strings, "%.0f", + circs_in_share[i] == 0 ? 0.0 : + time_in_queue[i] / (double) circs_in_share[i]); + } + + /* Join all observations in single strings. */ + processed_cells_string = smartlist_join_strings(processed_cells_strings, + ",", 0, NULL); + queued_cells_string = smartlist_join_strings(queued_cells_strings, + ",", 0, NULL); + time_in_queue_string = smartlist_join_strings(time_in_queue_strings, + ",", 0, NULL); + SMARTLIST_FOREACH(processed_cells_strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(queued_cells_strings, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(time_in_queue_strings, char *, cp, tor_free(cp)); + smartlist_free(processed_cells_strings); + smartlist_free(queued_cells_strings); + smartlist_free(time_in_queue_strings); + + /* Put everything together. */ + format_iso_time(t, now); + tor_asprintf(&result, "cell-stats-end %s (%d s)\n" + "cell-processed-cells %s\n" + "cell-queued-cells %s\n" + "cell-time-in-queue %s\n" + "cell-circuits-per-decile %d\n", + t, (unsigned) (now - start_of_buffer_stats_interval), + processed_cells_string, + queued_cells_string, + time_in_queue_string, + CEIL_DIV(number_of_circuits, SHARES)); + tor_free(processed_cells_string); + tor_free(queued_cells_string); + tor_free(time_in_queue_string); + return result; +#undef SHARES +} + +/** If 24 hours have passed since the beginning of the current buffer + * stats period, write buffer stats to $DATADIR/stats/buffer-stats + * (possibly overwriting an existing file) and reset counters. Return + * when we would next want to write buffer stats or 0 if we never want to + * write. */ +time_t +rep_hist_buffer_stats_write(time_t now) +{ + char *str = NULL; + + if (!start_of_buffer_stats_interval) + return 0; /* Not initialized. */ + if (start_of_buffer_stats_interval + WRITE_STATS_INTERVAL > now) + goto done; /* Not ready to write */ + + /* Add open circuits to the history. */ + SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, circ) { + rep_hist_buffer_stats_add_circ(circ, now); + } + SMARTLIST_FOREACH_END(circ); + + /* Generate history string. */ + str = rep_hist_format_buffer_stats(now); + + /* Reset both buffer history and counters of open circuits. */ + rep_hist_reset_buffer_stats(now); + + /* Try to write to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "buffer-stats", str, "buffer statistics"); + } + + done: + tor_free(str); + return start_of_buffer_stats_interval + WRITE_STATS_INTERVAL; +} + +/*** Descriptor serving statistics ***/ + +/** Digestmap to track which descriptors were downloaded this stats + * collection interval. It maps descriptor digest to pointers to 1, + * effectively turning this into a list. */ +static digestmap_t *served_descs = NULL; + +/** Number of how many descriptors were downloaded in total during this + * interval. */ +static unsigned long total_descriptor_downloads; + +/** Start time of served descs stats or 0 if we're not collecting those. */ +static time_t start_of_served_descs_stats_interval; + +/** Initialize descriptor stats. */ +void +rep_hist_desc_stats_init(time_t now) +{ + if (served_descs) { + log_warn(LD_BUG, "Called rep_hist_desc_stats_init() when desc stats were " + "already initialized. This is probably harmless."); + return; // Already initialized + } + served_descs = digestmap_new(); + total_descriptor_downloads = 0; + start_of_served_descs_stats_interval = now; +} + +/** Reset served descs stats to empty, starting a new interval <b>now</b>. */ +static void +rep_hist_reset_desc_stats(time_t now) +{ + rep_hist_desc_stats_term(); + rep_hist_desc_stats_init(now); +} + +/** Stop collecting served descs stats, so that rep_hist_desc_stats_init() is + * safe to be called again. */ +void +rep_hist_desc_stats_term(void) +{ + digestmap_free(served_descs, NULL); + served_descs = NULL; + start_of_served_descs_stats_interval = 0; + total_descriptor_downloads = 0; +} + +/** Helper for rep_hist_desc_stats_write(). Return a newly allocated string + * containing the served desc statistics until now, or NULL if we're not + * collecting served desc stats. Caller must ensure that now is not before + * start_of_served_descs_stats_interval. */ +static char * +rep_hist_format_desc_stats(time_t now) +{ + char t[ISO_TIME_LEN+1]; + char *result; + + digestmap_iter_t *iter; + const char *key; + void *val; + unsigned size; + int *vals, max = 0, q3 = 0, md = 0, q1 = 0, min = 0; + int n = 0; + + if (!start_of_served_descs_stats_interval) + return NULL; + + size = digestmap_size(served_descs); + if (size > 0) { + vals = tor_calloc(size, sizeof(int)); + for (iter = digestmap_iter_init(served_descs); + !digestmap_iter_done(iter); + iter = digestmap_iter_next(served_descs, iter)) { + uintptr_t count; + digestmap_iter_get(iter, &key, &val); + count = (uintptr_t)val; + vals[n++] = (int)count; + (void)key; + } + max = find_nth_int(vals, size, size-1); + q3 = find_nth_int(vals, size, (3*size-1)/4); + md = find_nth_int(vals, size, (size-1)/2); + q1 = find_nth_int(vals, size, (size-1)/4); + min = find_nth_int(vals, size, 0); + tor_free(vals); + } + + format_iso_time(t, now); + + tor_asprintf(&result, + "served-descs-stats-end %s (%d s) total=%lu unique=%u " + "max=%d q3=%d md=%d q1=%d min=%d\n", + t, + (unsigned) (now - start_of_served_descs_stats_interval), + total_descriptor_downloads, + size, max, q3, md, q1, min); + + return result; +} + +/** If WRITE_STATS_INTERVAL seconds have passed since the beginning of + * the current served desc stats interval, write the stats to + * $DATADIR/stats/served-desc-stats (possibly appending to an existing file) + * and reset the state for the next interval. Return when we would next want + * to write served desc stats or 0 if we won't want to write. */ +time_t +rep_hist_desc_stats_write(time_t now) +{ + char *filename = NULL, *str = NULL; + + if (!start_of_served_descs_stats_interval) + return 0; /* We're not collecting stats. */ + if (start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL > now) + return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL; + + str = rep_hist_format_desc_stats(now); + tor_assert(str != NULL); + + if (check_or_create_data_subdir("stats") < 0) { + goto done; + } + filename = get_datadir_fname2("stats", "served-desc-stats"); + if (append_bytes_to_file(filename, str, strlen(str), 0) < 0) + log_warn(LD_HIST, "Unable to write served descs statistics to disk!"); + + rep_hist_reset_desc_stats(now); + + done: + tor_free(filename); + tor_free(str); + return start_of_served_descs_stats_interval + WRITE_STATS_INTERVAL; +} + +/** Called to note that we've served a given descriptor (by + * digest). Increments the count of descriptors served, and the number + * of times we've served this descriptor. */ +void +rep_hist_note_desc_served(const char * desc) +{ + void *val; + uintptr_t count; + if (!served_descs) + return; // We're not collecting stats + val = digestmap_get(served_descs, desc); + count = (uintptr_t)val; + if (count != INT_MAX) + ++count; + digestmap_set(served_descs, desc, (void*)count); + total_descriptor_downloads++; +} + +/*** Connection statistics ***/ + +/** Start of the current connection stats interval or 0 if we're not + * collecting connection statistics. */ +static time_t start_of_conn_stats_interval; + +/** Initialize connection stats. */ +void +rep_hist_conn_stats_init(time_t now) +{ + start_of_conn_stats_interval = now; +} + +/* Count connections that we read and wrote less than these many bytes + * from/to as below threshold. */ +#define BIDI_THRESHOLD 20480 + +/* Count connections that we read or wrote at least this factor as many + * bytes from/to than we wrote or read to/from as mostly reading or + * writing. */ +#define BIDI_FACTOR 10 + +/* Interval length in seconds for considering read and written bytes for + * connection stats. */ +#define BIDI_INTERVAL 10 + +/** Start of next BIDI_INTERVAL second interval. */ +static time_t bidi_next_interval = 0; + +/** Number of connections that we read and wrote less than BIDI_THRESHOLD + * bytes from/to in BIDI_INTERVAL seconds. */ +static uint32_t below_threshold = 0; + +/** Number of connections that we read at least BIDI_FACTOR times more + * bytes from than we wrote to in BIDI_INTERVAL seconds. */ +static uint32_t mostly_read = 0; + +/** Number of connections that we wrote at least BIDI_FACTOR times more + * bytes to than we read from in BIDI_INTERVAL seconds. */ +static uint32_t mostly_written = 0; + +/** Number of connections that we read and wrote at least BIDI_THRESHOLD + * bytes from/to, but not BIDI_FACTOR times more in either direction in + * BIDI_INTERVAL seconds. */ +static uint32_t both_read_and_written = 0; + +/** Entry in a map from connection ID to the number of read and written + * bytes on this connection in a BIDI_INTERVAL second interval. */ +typedef struct bidi_map_entry_t { + HT_ENTRY(bidi_map_entry_t) node; + uint64_t conn_id; /**< Connection ID */ + size_t read; /**< Number of read bytes */ + size_t written; /**< Number of written bytes */ +} bidi_map_entry_t; + +/** Map of OR connections together with the number of read and written + * bytes in the current BIDI_INTERVAL second interval. */ +static HT_HEAD(bidimap, bidi_map_entry_t) bidi_map = + HT_INITIALIZER(); + +static int +bidi_map_ent_eq(const bidi_map_entry_t *a, const bidi_map_entry_t *b) +{ + return a->conn_id == b->conn_id; +} + +/* DOCDOC bidi_map_ent_hash */ +static unsigned +bidi_map_ent_hash(const bidi_map_entry_t *entry) +{ + return (unsigned) entry->conn_id; +} + +HT_PROTOTYPE(bidimap, bidi_map_entry_t, node, bidi_map_ent_hash, + bidi_map_ent_eq) +HT_GENERATE2(bidimap, bidi_map_entry_t, node, bidi_map_ent_hash, + bidi_map_ent_eq, 0.6, tor_reallocarray_, tor_free_) + +/* DOCDOC bidi_map_free */ +static void +bidi_map_free_all(void) +{ + bidi_map_entry_t **ptr, **next, *ent; + for (ptr = HT_START(bidimap, &bidi_map); ptr; ptr = next) { + ent = *ptr; + next = HT_NEXT_RMV(bidimap, &bidi_map, ptr); + tor_free(ent); + } + HT_CLEAR(bidimap, &bidi_map); +} + +/** Reset counters for conn statistics. */ +void +rep_hist_reset_conn_stats(time_t now) +{ + start_of_conn_stats_interval = now; + below_threshold = 0; + mostly_read = 0; + mostly_written = 0; + both_read_and_written = 0; + bidi_map_free_all(); +} + +/** Stop collecting connection stats in a way that we can re-start doing + * so in rep_hist_conn_stats_init(). */ +void +rep_hist_conn_stats_term(void) +{ + rep_hist_reset_conn_stats(0); +} + +/** We read <b>num_read</b> bytes and wrote <b>num_written</b> from/to OR + * connection <b>conn_id</b> in second <b>when</b>. If this is the first + * observation in a new interval, sum up the last observations. Add bytes + * for this connection. */ +void +rep_hist_note_or_conn_bytes(uint64_t conn_id, size_t num_read, + size_t num_written, time_t when) +{ + if (!start_of_conn_stats_interval) + return; + /* Initialize */ + if (bidi_next_interval == 0) + bidi_next_interval = when + BIDI_INTERVAL; + /* Sum up last period's statistics */ + if (when >= bidi_next_interval) { + bidi_map_entry_t **ptr, **next, *ent; + for (ptr = HT_START(bidimap, &bidi_map); ptr; ptr = next) { + ent = *ptr; + if (ent->read + ent->written < BIDI_THRESHOLD) + below_threshold++; + else if (ent->read >= ent->written * BIDI_FACTOR) + mostly_read++; + else if (ent->written >= ent->read * BIDI_FACTOR) + mostly_written++; + else + both_read_and_written++; + next = HT_NEXT_RMV(bidimap, &bidi_map, ptr); + tor_free(ent); + } + while (when >= bidi_next_interval) + bidi_next_interval += BIDI_INTERVAL; + log_info(LD_GENERAL, "%d below threshold, %d mostly read, " + "%d mostly written, %d both read and written.", + below_threshold, mostly_read, mostly_written, + both_read_and_written); + } + /* Add this connection's bytes. */ + if (num_read > 0 || num_written > 0) { + bidi_map_entry_t *entry, lookup; + lookup.conn_id = conn_id; + entry = HT_FIND(bidimap, &bidi_map, &lookup); + if (entry) { + entry->written += num_written; + entry->read += num_read; + } else { + entry = tor_malloc_zero(sizeof(bidi_map_entry_t)); + entry->conn_id = conn_id; + entry->written = num_written; + entry->read = num_read; + HT_INSERT(bidimap, &bidi_map, entry); + } + } +} + +/** Return a newly allocated string containing the connection statistics + * until <b>now</b>, or NULL if we're not collecting conn stats. Caller must + * ensure start_of_conn_stats_interval is in the past. */ +char * +rep_hist_format_conn_stats(time_t now) +{ + char *result, written[ISO_TIME_LEN+1]; + + if (!start_of_conn_stats_interval) + return NULL; /* Not initialized. */ + + tor_assert(now >= start_of_conn_stats_interval); + + format_iso_time(written, now); + tor_asprintf(&result, "conn-bi-direct %s (%d s) %d,%d,%d,%d\n", + written, + (unsigned) (now - start_of_conn_stats_interval), + below_threshold, + mostly_read, + mostly_written, + both_read_and_written); + return result; +} + +/** If 24 hours have passed since the beginning of the current conn stats + * period, write conn stats to $DATADIR/stats/conn-stats (possibly + * overwriting an existing file) and reset counters. Return when we would + * next want to write conn stats or 0 if we never want to write. */ +time_t +rep_hist_conn_stats_write(time_t now) +{ + char *str = NULL; + + if (!start_of_conn_stats_interval) + return 0; /* Not initialized. */ + if (start_of_conn_stats_interval + WRITE_STATS_INTERVAL > now) + goto done; /* Not ready to write */ + + /* Generate history string. */ + str = rep_hist_format_conn_stats(now); + + /* Reset counters. */ + rep_hist_reset_conn_stats(now); + + /* Try to write to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "conn-stats", str, "connection statistics"); + } + + done: + tor_free(str); + return start_of_conn_stats_interval + WRITE_STATS_INTERVAL; +} + +/** Internal statistics to track how many requests of each type of + * handshake we've received, and how many we've assigned to cpuworkers. + * Useful for seeing trends in cpu load. + * @{ */ +STATIC int onion_handshakes_requested[MAX_ONION_HANDSHAKE_TYPE+1] = {0}; +STATIC int onion_handshakes_assigned[MAX_ONION_HANDSHAKE_TYPE+1] = {0}; +/**@}*/ + +/** A new onionskin (using the <b>type</b> handshake) has arrived. */ +void +rep_hist_note_circuit_handshake_requested(uint16_t type) +{ + if (type <= MAX_ONION_HANDSHAKE_TYPE) + onion_handshakes_requested[type]++; +} + +/** We've sent an onionskin (using the <b>type</b> handshake) to a + * cpuworker. */ +void +rep_hist_note_circuit_handshake_assigned(uint16_t type) +{ + if (type <= MAX_ONION_HANDSHAKE_TYPE) + onion_handshakes_assigned[type]++; +} + +/** Log our onionskin statistics since the last time we were called. */ +void +rep_hist_log_circuit_handshake_stats(time_t now) +{ + (void)now; + log_notice(LD_HEARTBEAT, "Circuit handshake stats since last time: " + "%d/%d TAP, %d/%d NTor.", + onion_handshakes_assigned[ONION_HANDSHAKE_TYPE_TAP], + onion_handshakes_requested[ONION_HANDSHAKE_TYPE_TAP], + onion_handshakes_assigned[ONION_HANDSHAKE_TYPE_NTOR], + onion_handshakes_requested[ONION_HANDSHAKE_TYPE_NTOR]); + memset(onion_handshakes_assigned, 0, sizeof(onion_handshakes_assigned)); + memset(onion_handshakes_requested, 0, sizeof(onion_handshakes_requested)); +} + +/* Hidden service statistics section */ + +/** Start of the current hidden service stats interval or 0 if we're + * not collecting hidden service statistics. */ +static time_t start_of_hs_stats_interval; + +/** Carries the various hidden service statistics, and any other + * information needed. */ +typedef struct hs_stats_t { + /** How many relay cells have we seen as rendezvous points? */ + uint64_t rp_relay_cells_seen; + + /** Set of unique public key digests we've seen this stat period + * (could also be implemented as sorted smartlist). */ + digestmap_t *onions_seen_this_period; +} hs_stats_t; + +/** Our statistics structure singleton. */ +static hs_stats_t *hs_stats = NULL; + +/** Allocate, initialize and return an hs_stats_t structure. */ +static hs_stats_t * +hs_stats_new(void) +{ + hs_stats_t *new_hs_stats = tor_malloc_zero(sizeof(hs_stats_t)); + new_hs_stats->onions_seen_this_period = digestmap_new(); + + return new_hs_stats; +} + +#define hs_stats_free(val) \ + FREE_AND_NULL(hs_stats_t, hs_stats_free_, (val)) + +/** Free an hs_stats_t structure. */ +static void +hs_stats_free_(hs_stats_t *victim_hs_stats) +{ + if (!victim_hs_stats) { + return; + } + + digestmap_free(victim_hs_stats->onions_seen_this_period, NULL); + tor_free(victim_hs_stats); +} + +/** Initialize hidden service statistics. */ +void +rep_hist_hs_stats_init(time_t now) +{ + if (!hs_stats) { + hs_stats = hs_stats_new(); + } + + start_of_hs_stats_interval = now; +} + +/** Clear history of hidden service statistics and set the measurement + * interval start to <b>now</b>. */ +static void +rep_hist_reset_hs_stats(time_t now) +{ + if (!hs_stats) { + hs_stats = hs_stats_new(); + } + + hs_stats->rp_relay_cells_seen = 0; + + digestmap_free(hs_stats->onions_seen_this_period, NULL); + hs_stats->onions_seen_this_period = digestmap_new(); + + start_of_hs_stats_interval = now; +} + +/** Stop collecting hidden service stats in a way that we can re-start + * doing so in rep_hist_buffer_stats_init(). */ +void +rep_hist_hs_stats_term(void) +{ + rep_hist_reset_hs_stats(0); +} + +/** We saw a new HS relay cell, Count it! */ +void +rep_hist_seen_new_rp_cell(void) +{ + if (!hs_stats) { + return; // We're not collecting stats + } + + hs_stats->rp_relay_cells_seen++; +} + +/** As HSDirs, we saw another hidden service with public key + * <b>pubkey</b>. Check whether we have counted it before, if not + * count it now! */ +void +rep_hist_stored_maybe_new_hs(const crypto_pk_t *pubkey) +{ + char pubkey_hash[DIGEST_LEN]; + + if (!hs_stats) { + return; // We're not collecting stats + } + + /* Get the digest of the pubkey which will be used to detect whether + we've seen this hidden service before or not. */ + if (crypto_pk_get_digest(pubkey, pubkey_hash) < 0) { + /* This fail should not happen; key has been validated by + descriptor parsing code first. */ + return; + } + + /* Check if this is the first time we've seen this hidden + service. If it is, count it as new. */ + if (!digestmap_get(hs_stats->onions_seen_this_period, + pubkey_hash)) { + digestmap_set(hs_stats->onions_seen_this_period, + pubkey_hash, (void*)(uintptr_t)1); + } +} + +/* The number of cells that are supposed to be hidden from the adversary + * by adding noise from the Laplace distribution. This value, divided by + * EPSILON, is Laplace parameter b. It must be greather than 0. */ +#define REND_CELLS_DELTA_F 2048 +/* Security parameter for obfuscating number of cells with a value between + * ]0.0, 1.0]. Smaller values obfuscate observations more, but at the same + * time make statistics less usable. */ +#define REND_CELLS_EPSILON 0.3 +/* The number of cells that are supposed to be hidden from the adversary + * by rounding up to the next multiple of this number. */ +#define REND_CELLS_BIN_SIZE 1024 +/* The number of service identities that are supposed to be hidden from the + * adversary by adding noise from the Laplace distribution. This value, + * divided by EPSILON, is Laplace parameter b. It must be greater than 0. */ +#define ONIONS_SEEN_DELTA_F 8 +/* Security parameter for obfuscating number of service identities with a + * value between ]0.0, 1.0]. Smaller values obfuscate observations more, but + * at the same time make statistics less usable. */ +#define ONIONS_SEEN_EPSILON 0.3 +/* The number of service identities that are supposed to be hidden from + * the adversary by rounding up to the next multiple of this number. */ +#define ONIONS_SEEN_BIN_SIZE 8 + +/** Allocate and return a string containing hidden service stats that + * are meant to be placed in the extra-info descriptor. */ +static char * +rep_hist_format_hs_stats(time_t now) +{ + char t[ISO_TIME_LEN+1]; + char *hs_stats_string; + int64_t obfuscated_cells_seen; + int64_t obfuscated_onions_seen; + + uint64_t rounded_cells_seen + = round_uint64_to_next_multiple_of(hs_stats->rp_relay_cells_seen, + REND_CELLS_BIN_SIZE); + rounded_cells_seen = MIN(rounded_cells_seen, INT64_MAX); + obfuscated_cells_seen = add_laplace_noise((int64_t)rounded_cells_seen, + crypto_rand_double(), + REND_CELLS_DELTA_F, REND_CELLS_EPSILON); + + uint64_t rounded_onions_seen = + round_uint64_to_next_multiple_of((size_t)digestmap_size( + hs_stats->onions_seen_this_period), + ONIONS_SEEN_BIN_SIZE); + rounded_onions_seen = MIN(rounded_onions_seen, INT64_MAX); + obfuscated_onions_seen = add_laplace_noise((int64_t)rounded_onions_seen, + crypto_rand_double(), ONIONS_SEEN_DELTA_F, + ONIONS_SEEN_EPSILON); + + format_iso_time(t, now); + tor_asprintf(&hs_stats_string, "hidserv-stats-end %s (%d s)\n" + "hidserv-rend-relayed-cells %"PRId64" delta_f=%d " + "epsilon=%.2f bin_size=%d\n" + "hidserv-dir-onions-seen %"PRId64" delta_f=%d " + "epsilon=%.2f bin_size=%d\n", + t, (unsigned) (now - start_of_hs_stats_interval), + (obfuscated_cells_seen), REND_CELLS_DELTA_F, + REND_CELLS_EPSILON, REND_CELLS_BIN_SIZE, + (obfuscated_onions_seen), + ONIONS_SEEN_DELTA_F, + ONIONS_SEEN_EPSILON, ONIONS_SEEN_BIN_SIZE); + + return hs_stats_string; +} + +/** If 24 hours have passed since the beginning of the current HS + * stats period, write buffer stats to $DATADIR/stats/hidserv-stats + * (possibly overwriting an existing file) and reset counters. Return + * when we would next want to write buffer stats or 0 if we never want to + * write. */ +time_t +rep_hist_hs_stats_write(time_t now) +{ + char *str = NULL; + + if (!start_of_hs_stats_interval) { + return 0; /* Not initialized. */ + } + + if (start_of_hs_stats_interval + WRITE_STATS_INTERVAL > now) { + goto done; /* Not ready to write */ + } + + /* Generate history string. */ + str = rep_hist_format_hs_stats(now); + + /* Reset HS history. */ + rep_hist_reset_hs_stats(now); + + /* Try to write to disk. */ + if (!check_or_create_data_subdir("stats")) { + write_to_data_subdir("stats", "hidserv-stats", str, + "hidden service stats"); + } + + done: + tor_free(str); + return start_of_hs_stats_interval + WRITE_STATS_INTERVAL; +} + +static uint64_t link_proto_count[MAX_LINK_PROTO+1][2]; + +/** Note that we negotiated link protocol version <b>link_proto</b>, on + * a connection that started here iff <b>started_here</b> is true. + */ +void +rep_hist_note_negotiated_link_proto(unsigned link_proto, int started_here) +{ + started_here = !!started_here; /* force to 0 or 1 */ + if (link_proto > MAX_LINK_PROTO) { + log_warn(LD_BUG, "Can't log link protocol %u", link_proto); + return; + } + + link_proto_count[link_proto][started_here]++; +} + +/** + * Update the maximum count of total pending channel padding timers + * in this period. + */ +void +rep_hist_padding_count_timers(uint64_t num_timers) +{ + if (num_timers > padding_current.maximum_chanpad_timers) { + padding_current.maximum_chanpad_timers = num_timers; + } +} + +/** + * Count a cell that we sent for padding overhead statistics. + * + * RELAY_COMMAND_DROP and CELL_PADDING are accounted separately. Both should be + * counted for PADDING_TYPE_TOTAL. + */ +void +rep_hist_padding_count_write(padding_type_t type) +{ + switch (type) { + case PADDING_TYPE_DROP: + padding_current.write_drop_cell_count++; + break; + case PADDING_TYPE_CELL: + padding_current.write_pad_cell_count++; + break; + case PADDING_TYPE_TOTAL: + padding_current.write_cell_count++; + break; + case PADDING_TYPE_ENABLED_TOTAL: + padding_current.enabled_write_cell_count++; + break; + case PADDING_TYPE_ENABLED_CELL: + padding_current.enabled_write_pad_cell_count++; + break; + } +} + +/** + * Count a cell that we've received for padding overhead statistics. + * + * RELAY_COMMAND_DROP and CELL_PADDING are accounted separately. Both should be + * counted for PADDING_TYPE_TOTAL. + */ +void +rep_hist_padding_count_read(padding_type_t type) +{ + switch (type) { + case PADDING_TYPE_DROP: + padding_current.read_drop_cell_count++; + break; + case PADDING_TYPE_CELL: + padding_current.read_pad_cell_count++; + break; + case PADDING_TYPE_TOTAL: + padding_current.read_cell_count++; + break; + case PADDING_TYPE_ENABLED_TOTAL: + padding_current.enabled_read_cell_count++; + break; + case PADDING_TYPE_ENABLED_CELL: + padding_current.enabled_read_pad_cell_count++; + break; + } +} + +/** + * Reset our current padding statistics. Called once every 24 hours. + */ +void +rep_hist_reset_padding_counts(void) +{ + memset(&padding_current, 0, sizeof(padding_current)); +} + +/** + * Copy our current cell counts into a structure for listing in our + * extra-info descriptor. Also perform appropriate rounding and redaction. + * + * This function is called once every 24 hours. + */ +#define MIN_CELL_COUNTS_TO_PUBLISH 1 +#define ROUND_CELL_COUNTS_TO 10000 +void +rep_hist_prep_published_padding_counts(time_t now) +{ + memcpy(&padding_published, &padding_current, sizeof(padding_published)); + + if (padding_published.read_cell_count < MIN_CELL_COUNTS_TO_PUBLISH || + padding_published.write_cell_count < MIN_CELL_COUNTS_TO_PUBLISH) { + memset(&padding_published, 0, sizeof(padding_published)); + return; + } + + format_iso_time(padding_published.first_published_at, now); +#define ROUND_AND_SET_COUNT(x) (x) = round_uint64_to_next_multiple_of((x), \ + ROUND_CELL_COUNTS_TO) + ROUND_AND_SET_COUNT(padding_published.read_pad_cell_count); + ROUND_AND_SET_COUNT(padding_published.write_pad_cell_count); + ROUND_AND_SET_COUNT(padding_published.read_drop_cell_count); + ROUND_AND_SET_COUNT(padding_published.write_drop_cell_count); + ROUND_AND_SET_COUNT(padding_published.write_cell_count); + ROUND_AND_SET_COUNT(padding_published.read_cell_count); + ROUND_AND_SET_COUNT(padding_published.enabled_read_cell_count); + ROUND_AND_SET_COUNT(padding_published.enabled_read_pad_cell_count); + ROUND_AND_SET_COUNT(padding_published.enabled_write_cell_count); + ROUND_AND_SET_COUNT(padding_published.enabled_write_pad_cell_count); +#undef ROUND_AND_SET_COUNT +} + +/** + * Returns an allocated string for extra-info documents for publishing + * padding statistics from the last 24 hour interval. + */ +char * +rep_hist_get_padding_count_lines(void) +{ + char *result = NULL; + + if (!padding_published.read_cell_count || + !padding_published.write_cell_count) { + return NULL; + } + + tor_asprintf(&result, "padding-counts %s (%d s)" + " bin-size=%"PRIu64 + " write-drop=%"PRIu64 + " write-pad=%"PRIu64 + " write-total=%"PRIu64 + " read-drop=%"PRIu64 + " read-pad=%"PRIu64 + " read-total=%"PRIu64 + " enabled-read-pad=%"PRIu64 + " enabled-read-total=%"PRIu64 + " enabled-write-pad=%"PRIu64 + " enabled-write-total=%"PRIu64 + " max-chanpad-timers=%"PRIu64 + "\n", + padding_published.first_published_at, + REPHIST_CELL_PADDING_COUNTS_INTERVAL, + (uint64_t)ROUND_CELL_COUNTS_TO, + (padding_published.write_drop_cell_count), + (padding_published.write_pad_cell_count), + (padding_published.write_cell_count), + (padding_published.read_drop_cell_count), + (padding_published.read_pad_cell_count), + (padding_published.read_cell_count), + (padding_published.enabled_read_pad_cell_count), + (padding_published.enabled_read_cell_count), + (padding_published.enabled_write_pad_cell_count), + (padding_published.enabled_write_cell_count), + (padding_published.maximum_chanpad_timers) + ); + + return result; +} + +/** Log a heartbeat message explaining how many connections of each link + * protocol version we have used. + */ +void +rep_hist_log_link_protocol_counts(void) +{ + log_notice(LD_HEARTBEAT, + "Since startup, we have initiated " + "%"PRIu64" v1 connections, " + "%"PRIu64" v2 connections, " + "%"PRIu64" v3 connections, and " + "%"PRIu64" v4 connections; and received " + "%"PRIu64" v1 connections, " + "%"PRIu64" v2 connections, " + "%"PRIu64" v3 connections, and " + "%"PRIu64" v4 connections.", + (link_proto_count[1][1]), + (link_proto_count[2][1]), + (link_proto_count[3][1]), + (link_proto_count[4][1]), + (link_proto_count[1][0]), + (link_proto_count[2][0]), + (link_proto_count[3][0]), + (link_proto_count[4][0])); +} + +/** Free all storage held by the OR/link history caches, by the + * bandwidth history arrays, by the port history, or by statistics . */ +void +rep_hist_free_all(void) +{ + hs_stats_free(hs_stats); + digestmap_free(history_map, free_or_history); + + bw_array_free(read_array); + read_array = NULL; + + bw_array_free(write_array); + write_array = NULL; + + bw_array_free(dir_read_array); + dir_read_array = NULL; + + bw_array_free(dir_write_array); + dir_write_array = NULL; + + tor_free(exit_bytes_read); + tor_free(exit_bytes_written); + tor_free(exit_streams); + predicted_ports_free_all(); + bidi_map_free_all(); + + if (circuits_for_buffer_stats) { + SMARTLIST_FOREACH(circuits_for_buffer_stats, circ_buffer_stats_t *, s, + tor_free(s)); + smartlist_free(circuits_for_buffer_stats); + circuits_for_buffer_stats = NULL; + } + rep_hist_desc_stats_term(); + total_descriptor_downloads = 0; + + tor_assert_nonfatal(rephist_total_alloc == 0); + tor_assert_nonfatal_once(rephist_total_num == 0); +} diff --git a/src/feature/stats/rephist.h b/src/feature/stats/rephist.h new file mode 100644 index 0000000000..06a5e48211 --- /dev/null +++ b/src/feature/stats/rephist.h @@ -0,0 +1,140 @@ +/* Copyright (c) 2001 Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file rephist.h + * \brief Header file for rephist.c. + **/ + +#ifndef TOR_REPHIST_H +#define TOR_REPHIST_H + +void rep_hist_init(void); +void rep_hist_dump_stats(time_t now, int severity); +void rep_hist_note_bytes_read(size_t num_bytes, time_t when); +void rep_hist_note_bytes_written(size_t num_bytes, time_t when); + +void rep_hist_make_router_pessimal(const char *id, time_t when); + +void rep_hist_note_dir_bytes_read(size_t num_bytes, time_t when); +void rep_hist_note_dir_bytes_written(size_t num_bytes, time_t when); + +int rep_hist_bandwidth_assess(void); +char *rep_hist_get_bandwidth_lines(void); +void rep_hist_update_state(or_state_t *state); +int rep_hist_load_state(or_state_t *state, char **err); +void rep_history_clean(time_t before); + +void rep_hist_note_router_reachable(const char *id, const tor_addr_t *at_addr, + const uint16_t at_port, time_t when); +void rep_hist_note_router_unreachable(const char *id, time_t when); +int rep_hist_record_mtbf_data(time_t now, int missing_means_down); +int rep_hist_load_mtbf_data(time_t now); + +time_t rep_hist_downrate_old_runs(time_t now); +long rep_hist_get_uptime(const char *id, time_t when); +double rep_hist_get_stability(const char *id, time_t when); +double rep_hist_get_weighted_fractional_uptime(const char *id, time_t when); +long rep_hist_get_weighted_time_known(const char *id, time_t when); +int rep_hist_have_measured_enough_stability(void); + +void predicted_ports_init(void); +void rep_hist_note_used_port(time_t now, uint16_t port); +smartlist_t *rep_hist_get_predicted_ports(time_t now); +void rep_hist_remove_predicted_ports(const smartlist_t *rmv_ports); +void rep_hist_note_used_resolve(time_t now); +void rep_hist_note_used_internal(time_t now, int need_uptime, + int need_capacity); +int rep_hist_get_predicted_internal(time_t now, int *need_uptime, + int *need_capacity); + +int any_predicted_circuits(time_t now); +int rep_hist_circbuilding_dormant(time_t now); +int predicted_ports_prediction_time_remaining(time_t now); + +void rep_hist_exit_stats_init(time_t now); +void rep_hist_reset_exit_stats(time_t now); +void rep_hist_exit_stats_term(void); +char *rep_hist_format_exit_stats(time_t now); +time_t rep_hist_exit_stats_write(time_t now); +void rep_hist_note_exit_bytes(uint16_t port, size_t num_written, + size_t num_read); +void rep_hist_note_exit_stream_opened(uint16_t port); + +void rep_hist_buffer_stats_init(time_t now); +void rep_hist_buffer_stats_add_circ(circuit_t *circ, + time_t end_of_interval); +time_t rep_hist_buffer_stats_write(time_t now); +void rep_hist_buffer_stats_term(void); +void rep_hist_add_buffer_stats(double mean_num_cells_in_queue, + double mean_time_cells_in_queue, uint32_t processed_cells); +char *rep_hist_format_buffer_stats(time_t now); +void rep_hist_reset_buffer_stats(time_t now); + +void rep_hist_desc_stats_init(time_t now); +void rep_hist_note_desc_served(const char * desc); +void rep_hist_desc_stats_term(void); +time_t rep_hist_desc_stats_write(time_t now); + +void rep_hist_conn_stats_init(time_t now); +void rep_hist_note_or_conn_bytes(uint64_t conn_id, size_t num_read, + size_t num_written, time_t when); +void rep_hist_reset_conn_stats(time_t now); +char *rep_hist_format_conn_stats(time_t now); +time_t rep_hist_conn_stats_write(time_t now); +void rep_hist_conn_stats_term(void); + +void rep_hist_note_circuit_handshake_requested(uint16_t type); +void rep_hist_note_circuit_handshake_assigned(uint16_t type); +void rep_hist_log_circuit_handshake_stats(time_t now); + +void rep_hist_hs_stats_init(time_t now); +void rep_hist_hs_stats_term(void); +time_t rep_hist_hs_stats_write(time_t now); +char *rep_hist_get_hs_stats_string(void); +void rep_hist_seen_new_rp_cell(void); +void rep_hist_stored_maybe_new_hs(const crypto_pk_t *pubkey); + +void rep_hist_free_all(void); + +void rep_hist_note_negotiated_link_proto(unsigned link_proto, + int started_here); +void rep_hist_log_link_protocol_counts(void); + +extern uint64_t rephist_total_alloc; +extern uint32_t rephist_total_num; +#ifdef TOR_UNIT_TESTS +extern int onion_handshakes_requested[MAX_ONION_HANDSHAKE_TYPE+1]; +extern int onion_handshakes_assigned[MAX_ONION_HANDSHAKE_TYPE+1]; +#endif + +/** + * Represents the type of a cell for padding accounting + */ +typedef enum padding_type_t { + /** A RELAY_DROP cell */ + PADDING_TYPE_DROP, + /** A CELL_PADDING cell */ + PADDING_TYPE_CELL, + /** Total counts of padding and non-padding together */ + PADDING_TYPE_TOTAL, + /** Total cell counts for all padding-enabled channels */ + PADDING_TYPE_ENABLED_TOTAL, + /** CELL_PADDING counts for all padding-enabled channels */ + PADDING_TYPE_ENABLED_CELL +} padding_type_t; + +/** The amount of time over which the padding cell counts were counted */ +#define REPHIST_CELL_PADDING_COUNTS_INTERVAL (24*60*60) +void rep_hist_padding_count_read(padding_type_t type); +void rep_hist_padding_count_write(padding_type_t type); +char *rep_hist_get_padding_count_lines(void); +void rep_hist_reset_padding_counts(void); +void rep_hist_prep_published_padding_counts(time_t now); +void rep_hist_padding_count_timers(uint64_t num_timers); + +#endif /* !defined(TOR_REPHIST_H) */ + |