diff options
Diffstat (limited to 'src/feature/relay')
-rw-r--r-- | src/feature/relay/dns.c | 15 | ||||
-rw-r--r-- | src/feature/relay/include.am | 2 | ||||
-rw-r--r-- | src/feature/relay/onion_queue.c | 1 | ||||
-rw-r--r-- | src/feature/relay/relay_metrics.c | 401 | ||||
-rw-r--r-- | src/feature/relay/relay_metrics.h | 55 | ||||
-rw-r--r-- | src/feature/relay/relay_periodic.c | 2 | ||||
-rw-r--r-- | src/feature/relay/relay_sys.c | 5 | ||||
-rw-r--r-- | src/feature/relay/selftest.c | 4 |
8 files changed, 483 insertions, 2 deletions
diff --git a/src/feature/relay/dns.c b/src/feature/relay/dns.c index 6a703f2ab3..c6e0439338 100644 --- a/src/feature/relay/dns.c +++ b/src/feature/relay/dns.c @@ -1539,6 +1539,16 @@ evdns_callback(int result, char type, int count, int ttl, void *addresses, tor_addr_make_unspec(&addr); + /* Note down any DNS errors to the statistics module */ + if (result == DNS_ERR_TIMEOUT) { + /* libevent timed out while resolving a name. However, because libevent + * handles retries and timeouts internally, this means that all attempts of + * libevent timed out. If we wanted to get more granular information about + * individual libevent attempts, we would have to implement our own DNS + * timeout/retry logic */ + rep_hist_note_overload(OVERLOAD_GENERAL); + } + /* Keep track of whether IPv6 is working */ if (type == DNS_IPv6_AAAA) { if (result == DNS_ERR_TIMEOUT) { @@ -1642,7 +1652,7 @@ evdns_callback(int result, char type, int count, int ttl, void *addresses, /* The result can be changed within this function thus why we note the result * at the end. */ - rep_hist_note_dns_query(type, result); + rep_hist_note_dns_error(type, result); tor_free(arg_); } @@ -1662,6 +1672,9 @@ launch_one_resolve(const char *address, uint8_t query_type, addr[0] = (char) query_type; memcpy(addr+1, address, addr_len + 1); + /* Note the query for our statistics. */ + rep_hist_note_dns_request(query_type); + switch (query_type) { case DNS_IPv4_A: req = evdns_base_resolve_ipv4(the_evdns_base, diff --git a/src/feature/relay/include.am b/src/feature/relay/include.am index 84bb1ff35e..8a121cef01 100644 --- a/src/feature/relay/include.am +++ b/src/feature/relay/include.am @@ -15,6 +15,7 @@ MODULE_RELAY_SOURCES = \ src/feature/relay/routermode.c \ src/feature/relay/relay_config.c \ src/feature/relay/relay_handshake.c \ + src/feature/relay/relay_metrics.c \ src/feature/relay/relay_periodic.c \ src/feature/relay/relay_sys.c \ src/feature/relay/routerkeys.c \ @@ -30,6 +31,7 @@ noinst_HEADERS += \ src/feature/relay/onion_queue.h \ src/feature/relay/relay_config.h \ src/feature/relay/relay_handshake.h \ + src/feature/relay/relay_metrics.h \ src/feature/relay/relay_periodic.h \ src/feature/relay/relay_sys.h \ src/feature/relay/relay_find_addr.h \ diff --git a/src/feature/relay/onion_queue.c b/src/feature/relay/onion_queue.c index 85ec0dc74a..c09f4d5b9b 100644 --- a/src/feature/relay/onion_queue.c +++ b/src/feature/relay/onion_queue.c @@ -164,6 +164,7 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin) #define WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL (60) static ratelim_t last_warned = RATELIM_INIT(WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL); + rep_hist_note_circuit_handshake_dropped(onionskin->handshake_type); if (onionskin->handshake_type == ONION_HANDSHAKE_TYPE_NTOR) { char *m; /* Note this ntor onionskin drop as an overload */ diff --git a/src/feature/relay/relay_metrics.c b/src/feature/relay/relay_metrics.c new file mode 100644 index 0000000000..fc8eb10d1b --- /dev/null +++ b/src/feature/relay/relay_metrics.c @@ -0,0 +1,401 @@ +/* Copyright (c) 2021, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * @file relay_metrics.c + * @brief Relay metrics exposed through the MetricsPort + **/ + +#define RELAY_METRICS_ENTRY_PRIVATE + +#include "orconfig.h" + +#include "core/or/or.h" +#include "core/or/relay.h" + +#include "lib/malloc/malloc.h" +#include "lib/container/smartlist.h" +#include "lib/metrics/metrics_store.h" +#include "lib/log/util_bug.h" + +#include "feature/relay/relay_metrics.h" +#include "feature/stats/rephist.h" + +#include <event2/dns.h> + +/** Declarations of each fill function for metrics defined in base_metrics. */ +static void fill_dns_error_values(void); +static void fill_dns_query_values(void); +static void fill_global_bw_limit_values(void); +static void fill_socket_values(void); +static void fill_onionskins_values(void); +static void fill_oom_values(void); +static void fill_tcp_exhaustion_values(void); + +/** The base metrics that is a static array of metrics added to the metrics + * store. + * + * The key member MUST be also the index of the entry in the array. */ +static const relay_metrics_entry_t base_metrics[] = +{ + { + .key = RELAY_METRICS_NUM_OOM_BYTES, + .type = METRICS_TYPE_COUNTER, + .name = METRICS_NAME(relay_load_oom_bytes_total), + .help = "Total number of bytes the OOM has freed by subsystem", + .fill_fn = fill_oom_values, + }, + { + .key = RELAY_METRICS_NUM_ONIONSKINS, + .type = METRICS_TYPE_COUNTER, + .name = METRICS_NAME(relay_load_onionskins_total), + .help = "Total number of onionskins handled", + .fill_fn = fill_onionskins_values, + }, + { + .key = RELAY_METRICS_NUM_SOCKETS, + .type = METRICS_TYPE_GAUGE, + .name = METRICS_NAME(relay_load_socket_total), + .help = "Total number of sockets", + .fill_fn = fill_socket_values, + }, + { + .key = RELAY_METRICS_NUM_GLOBAL_RW_LIMIT, + .type = METRICS_TYPE_COUNTER, + .name = METRICS_NAME(relay_load_global_rate_limit_reached_total), + .help = "Total number of global connection bucket limit reached", + .fill_fn = fill_global_bw_limit_values, + }, + { + .key = RELAY_METRICS_NUM_DNS, + .type = METRICS_TYPE_COUNTER, + .name = METRICS_NAME(relay_exit_dns_query_total), + .help = "Total number of DNS queries done by this relay", + .fill_fn = fill_dns_query_values, + }, + { + .key = RELAY_METRICS_NUM_DNS_ERRORS, + .type = METRICS_TYPE_COUNTER, + .name = METRICS_NAME(relay_exit_dns_error_total), + .help = "Total number of DNS errors encountered by this relay", + .fill_fn = fill_dns_error_values, + }, + { + .key = RELAY_METRICS_NUM_TCP_EXHAUSTION, + .type = METRICS_TYPE_COUNTER, + .name = METRICS_NAME(relay_load_tcp_exhaustion_total), + .help = "Total number of times we ran out of TCP ports", + .fill_fn = fill_tcp_exhaustion_values, + }, +}; +static const size_t num_base_metrics = ARRAY_LENGTH(base_metrics); + +/** The only and single store of all the relay metrics. */ +static metrics_store_t *the_store; + +/** Helper function to convert an handshake type into a string. */ +static inline const char * +handshake_type_to_str(const uint16_t type) +{ + switch (type) { + case ONION_HANDSHAKE_TYPE_TAP: + return "tap"; + case ONION_HANDSHAKE_TYPE_FAST: + return "fast"; + case ONION_HANDSHAKE_TYPE_NTOR: + return "ntor"; + default: + // LCOV_EXCL_START + tor_assert_unreached(); + // LCOV_EXCL_STOP + } +} + +/** Fill function for the RELAY_METRICS_NUM_DNS metrics. */ +static void +fill_tcp_exhaustion_values(void) +{ + metrics_store_entry_t *sentry; + const relay_metrics_entry_t *rentry = + &base_metrics[RELAY_METRICS_NUM_TCP_EXHAUSTION]; + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_update(sentry, rep_hist_get_n_tcp_exhaustion()); +} + +/* NOTE: Disable the record type label until libevent is fixed. */ +#if 0 +/** Helper array containing mapping for the name of the different DNS records + * and their corresponding libevent values. */ +static struct dns_type { + const char *name; + uint8_t type; +} dns_types[] = { + { .name = "A", .type = DNS_IPv4_A }, + { .name = "PTR", .type = DNS_PTR }, + { .name = "AAAA", .type = DNS_IPv6_AAAA }, +}; +static const size_t num_dns_types = ARRAY_LENGTH(dns_types); +#endif + +/** Fill function for the RELAY_METRICS_NUM_DNS_ERRORS metrics. */ +static void +fill_dns_error_values(void) +{ + metrics_store_entry_t *sentry; + const relay_metrics_entry_t *rentry = + &base_metrics[RELAY_METRICS_NUM_DNS_ERRORS]; + + /* Helper array to map libeven DNS errors to their names and so we can + * iterate over this array to add all metrics. */ + static struct dns_error { + const char *name; + uint8_t key; + } errors[] = { + { .name = "success", .key = DNS_ERR_NONE }, + { .name = "format", .key = DNS_ERR_FORMAT }, + { .name = "serverfailed", .key = DNS_ERR_SERVERFAILED }, + { .name = "notexist", .key = DNS_ERR_NOTEXIST }, + { .name = "notimpl", .key = DNS_ERR_NOTIMPL }, + { .name = "refused", .key = DNS_ERR_REFUSED }, + { .name = "truncated", .key = DNS_ERR_TRUNCATED }, + { .name = "unknown", .key = DNS_ERR_UNKNOWN }, + { .name = "tor_timeout", .key = DNS_ERR_TIMEOUT }, + { .name = "shutdown", .key = DNS_ERR_SHUTDOWN }, + { .name = "cancel", .key = DNS_ERR_CANCEL }, + { .name = "nodata", .key = DNS_ERR_NODATA }, + }; + static const size_t num_errors = ARRAY_LENGTH(errors); + + /* NOTE: Disable the record type label until libevent is fixed. */ +#if 0 + for (size_t i = 0; i < num_dns_types; i++) { + /* Dup the label because metrics_format_label() returns a pointer to a + * string on the stack and we need that label for all metrics. */ + char *record_label = + tor_strdup(metrics_format_label("record", dns_types[i].name)); + + for (size_t j = 0; j < num_errors; j++) { + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, record_label); + metrics_store_entry_add_label(sentry, + metrics_format_label("reason", errors[j].name)); + metrics_store_entry_update(sentry, + rep_hist_get_n_dns_error(dns_types[i].type, errors[j].key)); + } + tor_free(record_label); + } +#endif + + /* Put in the DNS errors, unfortunately not per-type for now. */ + for (size_t j = 0; j < num_errors; j++) { + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("reason", errors[j].name)); + metrics_store_entry_update(sentry, + rep_hist_get_n_dns_error(0, errors[j].key)); + } +} + +/** Fill function for the RELAY_METRICS_NUM_DNS metrics. */ +static void +fill_dns_query_values(void) +{ + metrics_store_entry_t *sentry; + const relay_metrics_entry_t *rentry = + &base_metrics[RELAY_METRICS_NUM_DNS]; + + /* NOTE: Disable the record type label until libevent is fixed (#40490). */ +#if 0 + for (size_t i = 0; i < num_dns_types; i++) { + /* Dup the label because metrics_format_label() returns a pointer to a + * string on the stack and we need that label for all metrics. */ + char *record_label = + tor_strdup(metrics_format_label("record", dns_types[i].name)); + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, record_label); + metrics_store_entry_update(sentry, + rep_hist_get_n_dns_request(dns_types[i].type)); + tor_free(record_label); + } +#endif + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_update(sentry, rep_hist_get_n_dns_request(0)); +} + +/** Fill function for the RELAY_METRICS_NUM_GLOBAL_RW_LIMIT metrics. */ +static void +fill_global_bw_limit_values(void) +{ + metrics_store_entry_t *sentry; + const relay_metrics_entry_t *rentry = + &base_metrics[RELAY_METRICS_NUM_GLOBAL_RW_LIMIT]; + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("side", "read")); + metrics_store_entry_update(sentry, rep_hist_get_n_read_limit_reached()); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("side", "write")); + metrics_store_entry_update(sentry, rep_hist_get_n_write_limit_reached()); +} + +/** Fill function for the RELAY_METRICS_NUM_SOCKETS metrics. */ +static void +fill_socket_values(void) +{ + metrics_store_entry_t *sentry; + const relay_metrics_entry_t *rentry = + &base_metrics[RELAY_METRICS_NUM_SOCKETS]; + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("state", "opened")); + metrics_store_entry_update(sentry, get_n_open_sockets()); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_update(sentry, get_max_sockets()); +} + +/** Fill function for the RELAY_METRICS_NUM_ONIONSKINS metrics. */ +static void +fill_onionskins_values(void) +{ + metrics_store_entry_t *sentry; + const relay_metrics_entry_t *rentry = + &base_metrics[RELAY_METRICS_NUM_ONIONSKINS]; + + for (uint16_t t = 0; t <= MAX_ONION_HANDSHAKE_TYPE; t++) { + /* Dup the label because metrics_format_label() returns a pointer to a + * string on the stack and we need that label for all metrics. */ + char *type_label = + tor_strdup(metrics_format_label("type", handshake_type_to_str(t))); + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, type_label); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "processed")); + metrics_store_entry_update(sentry, + rep_hist_get_circuit_n_handshake_assigned(t)); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, type_label); + metrics_store_entry_add_label(sentry, + metrics_format_label("action", "dropped")); + metrics_store_entry_update(sentry, + rep_hist_get_circuit_n_handshake_dropped(t)); + tor_free(type_label); + } +} + +/** Fill function for the RELAY_METRICS_NUM_OOM_BYTES metrics. */ +static void +fill_oom_values(void) +{ + metrics_store_entry_t *sentry; + const relay_metrics_entry_t *rentry = + &base_metrics[RELAY_METRICS_NUM_OOM_BYTES]; + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("subsys", "cell")); + metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_cell); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("subsys", "dns")); + metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_dns); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("subsys", "geoip")); + metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_geoip); + + sentry = metrics_store_add(the_store, rentry->type, rentry->name, + rentry->help); + metrics_store_entry_add_label(sentry, + metrics_format_label("subsys", "hsdir")); + metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_hsdir); +} + +/** Reset the global store and fill it with all the metrics from base_metrics + * and their associated values. + * + * To pull this off, every metrics has a "fill" function that is called and in + * charge of adding the metrics to the store, appropriate labels and finally + * updating the value to report. */ +static void +fill_store(void) +{ + /* Reset the current store, we are about to fill it with all the things. */ + metrics_store_reset(the_store); + + /* Call the fill function for each metrics. */ + for (size_t i = 0; i < num_base_metrics; i++) { + if (BUG(!base_metrics[i].fill_fn)) { + continue; + } + base_metrics[i].fill_fn(); + } +} + +/** Return a list of all the relay metrics stores. This is the + * function attached to the .get_metrics() member of the subsys_t. */ +const smartlist_t * +relay_metrics_get_stores(void) +{ + /* We can't have the caller to free the returned list so keep it static, + * simply update it. */ + static smartlist_t *stores_list = NULL; + + /* We dynamically fill the store with all the metrics upon a request. The + * reason for this is because the exposed metrics of a relay are often + * internal counters in the fast path and thus we fetch the value when a + * metrics port request arrives instead of keeping a local metrics store of + * those values. */ + fill_store(); + + if (!stores_list) { + stores_list = smartlist_new(); + smartlist_add(stores_list, the_store); + } + + return stores_list; +} + +/** Initialize the relay metrics. */ +void +relay_metrics_init(void) +{ + if (BUG(the_store)) { + return; + } + the_store = metrics_store_new(); +} + +/** Free the relay metrics. */ +void +relay_metrics_free(void) +{ + if (!the_store) { + return; + } + /* NULL is set with this call. */ + metrics_store_free(the_store); +} diff --git a/src/feature/relay/relay_metrics.h b/src/feature/relay/relay_metrics.h new file mode 100644 index 0000000000..00dfeaa624 --- /dev/null +++ b/src/feature/relay/relay_metrics.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2021, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * @file relay_metrics.h + * @brief Header for feature/relay/relay_metrics.c + **/ + +#ifndef TOR_FEATURE_RELAY_RELAY_METRICS_H +#define TOR_FEATURE_RELAY_RELAY_METRICS_H + +#include "lib/container/smartlist.h" +#include "lib/metrics/metrics_common.h" + +/** Metrics key for each reported metrics. This key is also used as an index in + * the base_metrics array. */ +typedef enum { + /** Number of OOM invocation. */ + RELAY_METRICS_NUM_OOM_BYTES = 0, + /** Number of onionskines handled. */ + RELAY_METRICS_NUM_ONIONSKINS = 1, + /** Number of sockets. */ + RELAY_METRICS_NUM_SOCKETS = 2, + /** Number of global connection rate limit. */ + RELAY_METRICS_NUM_GLOBAL_RW_LIMIT = 3, + /** Number of DNS queries. */ + RELAY_METRICS_NUM_DNS = 4, + /** Number of DNS query errors. */ + RELAY_METRICS_NUM_DNS_ERRORS = 5, + /** Number of TCP exhaustion reached. */ + RELAY_METRICS_NUM_TCP_EXHAUSTION = 6, +} relay_metrics_key_t; + +/** The metadata of a relay metric. */ +typedef struct relay_metrics_entry_t { + /* Metric key used as a static array index. */ + relay_metrics_key_t key; + /* Metric type. */ + metrics_type_t type; + /* Metrics output name. */ + const char *name; + /* Metrics output help comment. */ + const char *help; + /* Update value function. */ + void (*fill_fn)(void); +} relay_metrics_entry_t; + +/* Init. */ +void relay_metrics_init(void); +void relay_metrics_free(void); + +/* Accessors. */ +const smartlist_t *relay_metrics_get_stores(void); + +#endif /* !defined(TOR_FEATURE_RELAY_RELAY_METRICS_H) */ diff --git a/src/feature/relay/relay_periodic.c b/src/feature/relay/relay_periodic.c index ee94590e01..dd9be4e36f 100644 --- a/src/feature/relay/relay_periodic.c +++ b/src/feature/relay/relay_periodic.c @@ -219,7 +219,7 @@ reachability_warnings_callback(time_t now, const or_options_t *options) tor_asprintf(&where4, "%s:%d", address4, me->ipv4_orport); if (!v6_ok) tor_asprintf(&where6, "[%s]:%d", address6, me->ipv6_orport); - const char *opt_and = (!v4_ok && !v6_ok) ? "and" : ""; + const char *opt_and = (!v4_ok && !v6_ok) ? " and " : ""; /* IPv4 reachability test worked but not the IPv6. We will _not_ * publish the descriptor if our IPv6 was configured. We will if it diff --git a/src/feature/relay/relay_sys.c b/src/feature/relay/relay_sys.c index 25fc0bbd32..9c43734b84 100644 --- a/src/feature/relay/relay_sys.c +++ b/src/feature/relay/relay_sys.c @@ -14,6 +14,7 @@ #include "feature/relay/dns.h" #include "feature/relay/ext_orport.h" +#include "feature/relay/relay_metrics.h" #include "feature/relay/onion_queue.h" #include "feature/relay/relay_periodic.h" #include "feature/relay/relay_sys.h" @@ -25,6 +26,7 @@ static int subsys_relay_initialize(void) { + relay_metrics_init(); relay_register_periodic_events(); return 0; } @@ -37,6 +39,7 @@ subsys_relay_shutdown(void) clear_pending_onions(); routerkeys_free_all(); router_free_all(); + relay_metrics_free(); } const struct subsys_fns_t sys_relay = { @@ -46,4 +49,6 @@ const struct subsys_fns_t sys_relay = { .level = RELAY_SUBSYS_LEVEL, .initialize = subsys_relay_initialize, .shutdown = subsys_relay_shutdown, + + .get_metrics = relay_metrics_get_stores, }; diff --git a/src/feature/relay/selftest.c b/src/feature/relay/selftest.c index 8922d20a19..a791079bd8 100644 --- a/src/feature/relay/selftest.c +++ b/src/feature/relay/selftest.c @@ -254,6 +254,10 @@ router_do_orport_reachability_checks(const routerinfo_t *me, if (ei) { const char *family_name = fmt_af_family(family); const tor_addr_port_t *ap = extend_info_get_orport(ei, family); + if (BUG(!ap)) { + /* Not much we can do here to recover apart from screaming loudly. */ + return; + } log_info(LD_CIRC, "Testing %s of my %s ORPort: %s.", !orport_reachable ? "reachability" : "bandwidth", family_name, fmt_addrport_ap(ap)); |