summaryrefslogtreecommitdiff
path: root/src/feature/relay
diff options
context:
space:
mode:
Diffstat (limited to 'src/feature/relay')
-rw-r--r--src/feature/relay/dns.c15
-rw-r--r--src/feature/relay/include.am2
-rw-r--r--src/feature/relay/onion_queue.c1
-rw-r--r--src/feature/relay/relay_metrics.c401
-rw-r--r--src/feature/relay/relay_metrics.h55
-rw-r--r--src/feature/relay/relay_periodic.c2
-rw-r--r--src/feature/relay/relay_sys.c5
7 files changed, 479 insertions, 2 deletions
diff --git a/src/feature/relay/dns.c b/src/feature/relay/dns.c
index 6a703f2ab3..c6e0439338 100644
--- a/src/feature/relay/dns.c
+++ b/src/feature/relay/dns.c
@@ -1539,6 +1539,16 @@ evdns_callback(int result, char type, int count, int ttl, void *addresses,
tor_addr_make_unspec(&addr);
+ /* Note down any DNS errors to the statistics module */
+ if (result == DNS_ERR_TIMEOUT) {
+ /* libevent timed out while resolving a name. However, because libevent
+ * handles retries and timeouts internally, this means that all attempts of
+ * libevent timed out. If we wanted to get more granular information about
+ * individual libevent attempts, we would have to implement our own DNS
+ * timeout/retry logic */
+ rep_hist_note_overload(OVERLOAD_GENERAL);
+ }
+
/* Keep track of whether IPv6 is working */
if (type == DNS_IPv6_AAAA) {
if (result == DNS_ERR_TIMEOUT) {
@@ -1642,7 +1652,7 @@ evdns_callback(int result, char type, int count, int ttl, void *addresses,
/* The result can be changed within this function thus why we note the result
* at the end. */
- rep_hist_note_dns_query(type, result);
+ rep_hist_note_dns_error(type, result);
tor_free(arg_);
}
@@ -1662,6 +1672,9 @@ launch_one_resolve(const char *address, uint8_t query_type,
addr[0] = (char) query_type;
memcpy(addr+1, address, addr_len + 1);
+ /* Note the query for our statistics. */
+ rep_hist_note_dns_request(query_type);
+
switch (query_type) {
case DNS_IPv4_A:
req = evdns_base_resolve_ipv4(the_evdns_base,
diff --git a/src/feature/relay/include.am b/src/feature/relay/include.am
index 84bb1ff35e..8a121cef01 100644
--- a/src/feature/relay/include.am
+++ b/src/feature/relay/include.am
@@ -15,6 +15,7 @@ MODULE_RELAY_SOURCES = \
src/feature/relay/routermode.c \
src/feature/relay/relay_config.c \
src/feature/relay/relay_handshake.c \
+ src/feature/relay/relay_metrics.c \
src/feature/relay/relay_periodic.c \
src/feature/relay/relay_sys.c \
src/feature/relay/routerkeys.c \
@@ -30,6 +31,7 @@ noinst_HEADERS += \
src/feature/relay/onion_queue.h \
src/feature/relay/relay_config.h \
src/feature/relay/relay_handshake.h \
+ src/feature/relay/relay_metrics.h \
src/feature/relay/relay_periodic.h \
src/feature/relay/relay_sys.h \
src/feature/relay/relay_find_addr.h \
diff --git a/src/feature/relay/onion_queue.c b/src/feature/relay/onion_queue.c
index 85ec0dc74a..c09f4d5b9b 100644
--- a/src/feature/relay/onion_queue.c
+++ b/src/feature/relay/onion_queue.c
@@ -164,6 +164,7 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin)
#define WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL (60)
static ratelim_t last_warned =
RATELIM_INIT(WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL);
+ rep_hist_note_circuit_handshake_dropped(onionskin->handshake_type);
if (onionskin->handshake_type == ONION_HANDSHAKE_TYPE_NTOR) {
char *m;
/* Note this ntor onionskin drop as an overload */
diff --git a/src/feature/relay/relay_metrics.c b/src/feature/relay/relay_metrics.c
new file mode 100644
index 0000000000..fc8eb10d1b
--- /dev/null
+++ b/src/feature/relay/relay_metrics.c
@@ -0,0 +1,401 @@
+/* Copyright (c) 2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * @file relay_metrics.c
+ * @brief Relay metrics exposed through the MetricsPort
+ **/
+
+#define RELAY_METRICS_ENTRY_PRIVATE
+
+#include "orconfig.h"
+
+#include "core/or/or.h"
+#include "core/or/relay.h"
+
+#include "lib/malloc/malloc.h"
+#include "lib/container/smartlist.h"
+#include "lib/metrics/metrics_store.h"
+#include "lib/log/util_bug.h"
+
+#include "feature/relay/relay_metrics.h"
+#include "feature/stats/rephist.h"
+
+#include <event2/dns.h>
+
+/** Declarations of each fill function for metrics defined in base_metrics. */
+static void fill_dns_error_values(void);
+static void fill_dns_query_values(void);
+static void fill_global_bw_limit_values(void);
+static void fill_socket_values(void);
+static void fill_onionskins_values(void);
+static void fill_oom_values(void);
+static void fill_tcp_exhaustion_values(void);
+
+/** The base metrics that is a static array of metrics added to the metrics
+ * store.
+ *
+ * The key member MUST be also the index of the entry in the array. */
+static const relay_metrics_entry_t base_metrics[] =
+{
+ {
+ .key = RELAY_METRICS_NUM_OOM_BYTES,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_load_oom_bytes_total),
+ .help = "Total number of bytes the OOM has freed by subsystem",
+ .fill_fn = fill_oom_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_ONIONSKINS,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_load_onionskins_total),
+ .help = "Total number of onionskins handled",
+ .fill_fn = fill_onionskins_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_SOCKETS,
+ .type = METRICS_TYPE_GAUGE,
+ .name = METRICS_NAME(relay_load_socket_total),
+ .help = "Total number of sockets",
+ .fill_fn = fill_socket_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_GLOBAL_RW_LIMIT,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_load_global_rate_limit_reached_total),
+ .help = "Total number of global connection bucket limit reached",
+ .fill_fn = fill_global_bw_limit_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_DNS,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_exit_dns_query_total),
+ .help = "Total number of DNS queries done by this relay",
+ .fill_fn = fill_dns_query_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_DNS_ERRORS,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_exit_dns_error_total),
+ .help = "Total number of DNS errors encountered by this relay",
+ .fill_fn = fill_dns_error_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_TCP_EXHAUSTION,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_load_tcp_exhaustion_total),
+ .help = "Total number of times we ran out of TCP ports",
+ .fill_fn = fill_tcp_exhaustion_values,
+ },
+};
+static const size_t num_base_metrics = ARRAY_LENGTH(base_metrics);
+
+/** The only and single store of all the relay metrics. */
+static metrics_store_t *the_store;
+
+/** Helper function to convert an handshake type into a string. */
+static inline const char *
+handshake_type_to_str(const uint16_t type)
+{
+ switch (type) {
+ case ONION_HANDSHAKE_TYPE_TAP:
+ return "tap";
+ case ONION_HANDSHAKE_TYPE_FAST:
+ return "fast";
+ case ONION_HANDSHAKE_TYPE_NTOR:
+ return "ntor";
+ default:
+ // LCOV_EXCL_START
+ tor_assert_unreached();
+ // LCOV_EXCL_STOP
+ }
+}
+
+/** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
+static void
+fill_tcp_exhaustion_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_TCP_EXHAUSTION];
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_update(sentry, rep_hist_get_n_tcp_exhaustion());
+}
+
+/* NOTE: Disable the record type label until libevent is fixed. */
+#if 0
+/** Helper array containing mapping for the name of the different DNS records
+ * and their corresponding libevent values. */
+static struct dns_type {
+ const char *name;
+ uint8_t type;
+} dns_types[] = {
+ { .name = "A", .type = DNS_IPv4_A },
+ { .name = "PTR", .type = DNS_PTR },
+ { .name = "AAAA", .type = DNS_IPv6_AAAA },
+};
+static const size_t num_dns_types = ARRAY_LENGTH(dns_types);
+#endif
+
+/** Fill function for the RELAY_METRICS_NUM_DNS_ERRORS metrics. */
+static void
+fill_dns_error_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_DNS_ERRORS];
+
+ /* Helper array to map libeven DNS errors to their names and so we can
+ * iterate over this array to add all metrics. */
+ static struct dns_error {
+ const char *name;
+ uint8_t key;
+ } errors[] = {
+ { .name = "success", .key = DNS_ERR_NONE },
+ { .name = "format", .key = DNS_ERR_FORMAT },
+ { .name = "serverfailed", .key = DNS_ERR_SERVERFAILED },
+ { .name = "notexist", .key = DNS_ERR_NOTEXIST },
+ { .name = "notimpl", .key = DNS_ERR_NOTIMPL },
+ { .name = "refused", .key = DNS_ERR_REFUSED },
+ { .name = "truncated", .key = DNS_ERR_TRUNCATED },
+ { .name = "unknown", .key = DNS_ERR_UNKNOWN },
+ { .name = "tor_timeout", .key = DNS_ERR_TIMEOUT },
+ { .name = "shutdown", .key = DNS_ERR_SHUTDOWN },
+ { .name = "cancel", .key = DNS_ERR_CANCEL },
+ { .name = "nodata", .key = DNS_ERR_NODATA },
+ };
+ static const size_t num_errors = ARRAY_LENGTH(errors);
+
+ /* NOTE: Disable the record type label until libevent is fixed. */
+#if 0
+ for (size_t i = 0; i < num_dns_types; i++) {
+ /* Dup the label because metrics_format_label() returns a pointer to a
+ * string on the stack and we need that label for all metrics. */
+ char *record_label =
+ tor_strdup(metrics_format_label("record", dns_types[i].name));
+
+ for (size_t j = 0; j < num_errors; j++) {
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry, record_label);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("reason", errors[j].name));
+ metrics_store_entry_update(sentry,
+ rep_hist_get_n_dns_error(dns_types[i].type, errors[j].key));
+ }
+ tor_free(record_label);
+ }
+#endif
+
+ /* Put in the DNS errors, unfortunately not per-type for now. */
+ for (size_t j = 0; j < num_errors; j++) {
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("reason", errors[j].name));
+ metrics_store_entry_update(sentry,
+ rep_hist_get_n_dns_error(0, errors[j].key));
+ }
+}
+
+/** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
+static void
+fill_dns_query_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_DNS];
+
+ /* NOTE: Disable the record type label until libevent is fixed (#40490). */
+#if 0
+ for (size_t i = 0; i < num_dns_types; i++) {
+ /* Dup the label because metrics_format_label() returns a pointer to a
+ * string on the stack and we need that label for all metrics. */
+ char *record_label =
+ tor_strdup(metrics_format_label("record", dns_types[i].name));
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry, record_label);
+ metrics_store_entry_update(sentry,
+ rep_hist_get_n_dns_request(dns_types[i].type));
+ tor_free(record_label);
+ }
+#endif
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_update(sentry, rep_hist_get_n_dns_request(0));
+}
+
+/** Fill function for the RELAY_METRICS_NUM_GLOBAL_RW_LIMIT metrics. */
+static void
+fill_global_bw_limit_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_GLOBAL_RW_LIMIT];
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("side", "read"));
+ metrics_store_entry_update(sentry, rep_hist_get_n_read_limit_reached());
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("side", "write"));
+ metrics_store_entry_update(sentry, rep_hist_get_n_write_limit_reached());
+}
+
+/** Fill function for the RELAY_METRICS_NUM_SOCKETS metrics. */
+static void
+fill_socket_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_SOCKETS];
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("state", "opened"));
+ metrics_store_entry_update(sentry, get_n_open_sockets());
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_update(sentry, get_max_sockets());
+}
+
+/** Fill function for the RELAY_METRICS_NUM_ONIONSKINS metrics. */
+static void
+fill_onionskins_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_ONIONSKINS];
+
+ for (uint16_t t = 0; t <= MAX_ONION_HANDSHAKE_TYPE; t++) {
+ /* Dup the label because metrics_format_label() returns a pointer to a
+ * string on the stack and we need that label for all metrics. */
+ char *type_label =
+ tor_strdup(metrics_format_label("type", handshake_type_to_str(t)));
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry, type_label);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("action", "processed"));
+ metrics_store_entry_update(sentry,
+ rep_hist_get_circuit_n_handshake_assigned(t));
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry, type_label);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("action", "dropped"));
+ metrics_store_entry_update(sentry,
+ rep_hist_get_circuit_n_handshake_dropped(t));
+ tor_free(type_label);
+ }
+}
+
+/** Fill function for the RELAY_METRICS_NUM_OOM_BYTES metrics. */
+static void
+fill_oom_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_OOM_BYTES];
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("subsys", "cell"));
+ metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_cell);
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("subsys", "dns"));
+ metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_dns);
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("subsys", "geoip"));
+ metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_geoip);
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("subsys", "hsdir"));
+ metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_hsdir);
+}
+
+/** Reset the global store and fill it with all the metrics from base_metrics
+ * and their associated values.
+ *
+ * To pull this off, every metrics has a "fill" function that is called and in
+ * charge of adding the metrics to the store, appropriate labels and finally
+ * updating the value to report. */
+static void
+fill_store(void)
+{
+ /* Reset the current store, we are about to fill it with all the things. */
+ metrics_store_reset(the_store);
+
+ /* Call the fill function for each metrics. */
+ for (size_t i = 0; i < num_base_metrics; i++) {
+ if (BUG(!base_metrics[i].fill_fn)) {
+ continue;
+ }
+ base_metrics[i].fill_fn();
+ }
+}
+
+/** Return a list of all the relay metrics stores. This is the
+ * function attached to the .get_metrics() member of the subsys_t. */
+const smartlist_t *
+relay_metrics_get_stores(void)
+{
+ /* We can't have the caller to free the returned list so keep it static,
+ * simply update it. */
+ static smartlist_t *stores_list = NULL;
+
+ /* We dynamically fill the store with all the metrics upon a request. The
+ * reason for this is because the exposed metrics of a relay are often
+ * internal counters in the fast path and thus we fetch the value when a
+ * metrics port request arrives instead of keeping a local metrics store of
+ * those values. */
+ fill_store();
+
+ if (!stores_list) {
+ stores_list = smartlist_new();
+ smartlist_add(stores_list, the_store);
+ }
+
+ return stores_list;
+}
+
+/** Initialize the relay metrics. */
+void
+relay_metrics_init(void)
+{
+ if (BUG(the_store)) {
+ return;
+ }
+ the_store = metrics_store_new();
+}
+
+/** Free the relay metrics. */
+void
+relay_metrics_free(void)
+{
+ if (!the_store) {
+ return;
+ }
+ /* NULL is set with this call. */
+ metrics_store_free(the_store);
+}
diff --git a/src/feature/relay/relay_metrics.h b/src/feature/relay/relay_metrics.h
new file mode 100644
index 0000000000..00dfeaa624
--- /dev/null
+++ b/src/feature/relay/relay_metrics.h
@@ -0,0 +1,55 @@
+/* Copyright (c) 2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * @file relay_metrics.h
+ * @brief Header for feature/relay/relay_metrics.c
+ **/
+
+#ifndef TOR_FEATURE_RELAY_RELAY_METRICS_H
+#define TOR_FEATURE_RELAY_RELAY_METRICS_H
+
+#include "lib/container/smartlist.h"
+#include "lib/metrics/metrics_common.h"
+
+/** Metrics key for each reported metrics. This key is also used as an index in
+ * the base_metrics array. */
+typedef enum {
+ /** Number of OOM invocation. */
+ RELAY_METRICS_NUM_OOM_BYTES = 0,
+ /** Number of onionskines handled. */
+ RELAY_METRICS_NUM_ONIONSKINS = 1,
+ /** Number of sockets. */
+ RELAY_METRICS_NUM_SOCKETS = 2,
+ /** Number of global connection rate limit. */
+ RELAY_METRICS_NUM_GLOBAL_RW_LIMIT = 3,
+ /** Number of DNS queries. */
+ RELAY_METRICS_NUM_DNS = 4,
+ /** Number of DNS query errors. */
+ RELAY_METRICS_NUM_DNS_ERRORS = 5,
+ /** Number of TCP exhaustion reached. */
+ RELAY_METRICS_NUM_TCP_EXHAUSTION = 6,
+} relay_metrics_key_t;
+
+/** The metadata of a relay metric. */
+typedef struct relay_metrics_entry_t {
+ /* Metric key used as a static array index. */
+ relay_metrics_key_t key;
+ /* Metric type. */
+ metrics_type_t type;
+ /* Metrics output name. */
+ const char *name;
+ /* Metrics output help comment. */
+ const char *help;
+ /* Update value function. */
+ void (*fill_fn)(void);
+} relay_metrics_entry_t;
+
+/* Init. */
+void relay_metrics_init(void);
+void relay_metrics_free(void);
+
+/* Accessors. */
+const smartlist_t *relay_metrics_get_stores(void);
+
+#endif /* !defined(TOR_FEATURE_RELAY_RELAY_METRICS_H) */
diff --git a/src/feature/relay/relay_periodic.c b/src/feature/relay/relay_periodic.c
index ee94590e01..dd9be4e36f 100644
--- a/src/feature/relay/relay_periodic.c
+++ b/src/feature/relay/relay_periodic.c
@@ -219,7 +219,7 @@ reachability_warnings_callback(time_t now, const or_options_t *options)
tor_asprintf(&where4, "%s:%d", address4, me->ipv4_orport);
if (!v6_ok)
tor_asprintf(&where6, "[%s]:%d", address6, me->ipv6_orport);
- const char *opt_and = (!v4_ok && !v6_ok) ? "and" : "";
+ const char *opt_and = (!v4_ok && !v6_ok) ? " and " : "";
/* IPv4 reachability test worked but not the IPv6. We will _not_
* publish the descriptor if our IPv6 was configured. We will if it
diff --git a/src/feature/relay/relay_sys.c b/src/feature/relay/relay_sys.c
index 25fc0bbd32..9c43734b84 100644
--- a/src/feature/relay/relay_sys.c
+++ b/src/feature/relay/relay_sys.c
@@ -14,6 +14,7 @@
#include "feature/relay/dns.h"
#include "feature/relay/ext_orport.h"
+#include "feature/relay/relay_metrics.h"
#include "feature/relay/onion_queue.h"
#include "feature/relay/relay_periodic.h"
#include "feature/relay/relay_sys.h"
@@ -25,6 +26,7 @@
static int
subsys_relay_initialize(void)
{
+ relay_metrics_init();
relay_register_periodic_events();
return 0;
}
@@ -37,6 +39,7 @@ subsys_relay_shutdown(void)
clear_pending_onions();
routerkeys_free_all();
router_free_all();
+ relay_metrics_free();
}
const struct subsys_fns_t sys_relay = {
@@ -46,4 +49,6 @@ const struct subsys_fns_t sys_relay = {
.level = RELAY_SUBSYS_LEVEL,
.initialize = subsys_relay_initialize,
.shutdown = subsys_relay_shutdown,
+
+ .get_metrics = relay_metrics_get_stores,
};