aboutsummaryrefslogtreecommitdiff
path: root/src/feature/relay/relay_metrics.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/feature/relay/relay_metrics.c')
-rw-r--r--src/feature/relay/relay_metrics.c401
1 files changed, 401 insertions, 0 deletions
diff --git a/src/feature/relay/relay_metrics.c b/src/feature/relay/relay_metrics.c
new file mode 100644
index 0000000000..fc8eb10d1b
--- /dev/null
+++ b/src/feature/relay/relay_metrics.c
@@ -0,0 +1,401 @@
+/* Copyright (c) 2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * @file relay_metrics.c
+ * @brief Relay metrics exposed through the MetricsPort
+ **/
+
+#define RELAY_METRICS_ENTRY_PRIVATE
+
+#include "orconfig.h"
+
+#include "core/or/or.h"
+#include "core/or/relay.h"
+
+#include "lib/malloc/malloc.h"
+#include "lib/container/smartlist.h"
+#include "lib/metrics/metrics_store.h"
+#include "lib/log/util_bug.h"
+
+#include "feature/relay/relay_metrics.h"
+#include "feature/stats/rephist.h"
+
+#include <event2/dns.h>
+
+/** Declarations of each fill function for metrics defined in base_metrics. */
+static void fill_dns_error_values(void);
+static void fill_dns_query_values(void);
+static void fill_global_bw_limit_values(void);
+static void fill_socket_values(void);
+static void fill_onionskins_values(void);
+static void fill_oom_values(void);
+static void fill_tcp_exhaustion_values(void);
+
+/** The base metrics that is a static array of metrics added to the metrics
+ * store.
+ *
+ * The key member MUST be also the index of the entry in the array. */
+static const relay_metrics_entry_t base_metrics[] =
+{
+ {
+ .key = RELAY_METRICS_NUM_OOM_BYTES,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_load_oom_bytes_total),
+ .help = "Total number of bytes the OOM has freed by subsystem",
+ .fill_fn = fill_oom_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_ONIONSKINS,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_load_onionskins_total),
+ .help = "Total number of onionskins handled",
+ .fill_fn = fill_onionskins_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_SOCKETS,
+ .type = METRICS_TYPE_GAUGE,
+ .name = METRICS_NAME(relay_load_socket_total),
+ .help = "Total number of sockets",
+ .fill_fn = fill_socket_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_GLOBAL_RW_LIMIT,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_load_global_rate_limit_reached_total),
+ .help = "Total number of global connection bucket limit reached",
+ .fill_fn = fill_global_bw_limit_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_DNS,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_exit_dns_query_total),
+ .help = "Total number of DNS queries done by this relay",
+ .fill_fn = fill_dns_query_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_DNS_ERRORS,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_exit_dns_error_total),
+ .help = "Total number of DNS errors encountered by this relay",
+ .fill_fn = fill_dns_error_values,
+ },
+ {
+ .key = RELAY_METRICS_NUM_TCP_EXHAUSTION,
+ .type = METRICS_TYPE_COUNTER,
+ .name = METRICS_NAME(relay_load_tcp_exhaustion_total),
+ .help = "Total number of times we ran out of TCP ports",
+ .fill_fn = fill_tcp_exhaustion_values,
+ },
+};
+static const size_t num_base_metrics = ARRAY_LENGTH(base_metrics);
+
+/** The only and single store of all the relay metrics. */
+static metrics_store_t *the_store;
+
+/** Helper function to convert an handshake type into a string. */
+static inline const char *
+handshake_type_to_str(const uint16_t type)
+{
+ switch (type) {
+ case ONION_HANDSHAKE_TYPE_TAP:
+ return "tap";
+ case ONION_HANDSHAKE_TYPE_FAST:
+ return "fast";
+ case ONION_HANDSHAKE_TYPE_NTOR:
+ return "ntor";
+ default:
+ // LCOV_EXCL_START
+ tor_assert_unreached();
+ // LCOV_EXCL_STOP
+ }
+}
+
+/** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
+static void
+fill_tcp_exhaustion_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_TCP_EXHAUSTION];
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_update(sentry, rep_hist_get_n_tcp_exhaustion());
+}
+
+/* NOTE: Disable the record type label until libevent is fixed. */
+#if 0
+/** Helper array containing mapping for the name of the different DNS records
+ * and their corresponding libevent values. */
+static struct dns_type {
+ const char *name;
+ uint8_t type;
+} dns_types[] = {
+ { .name = "A", .type = DNS_IPv4_A },
+ { .name = "PTR", .type = DNS_PTR },
+ { .name = "AAAA", .type = DNS_IPv6_AAAA },
+};
+static const size_t num_dns_types = ARRAY_LENGTH(dns_types);
+#endif
+
+/** Fill function for the RELAY_METRICS_NUM_DNS_ERRORS metrics. */
+static void
+fill_dns_error_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_DNS_ERRORS];
+
+ /* Helper array to map libeven DNS errors to their names and so we can
+ * iterate over this array to add all metrics. */
+ static struct dns_error {
+ const char *name;
+ uint8_t key;
+ } errors[] = {
+ { .name = "success", .key = DNS_ERR_NONE },
+ { .name = "format", .key = DNS_ERR_FORMAT },
+ { .name = "serverfailed", .key = DNS_ERR_SERVERFAILED },
+ { .name = "notexist", .key = DNS_ERR_NOTEXIST },
+ { .name = "notimpl", .key = DNS_ERR_NOTIMPL },
+ { .name = "refused", .key = DNS_ERR_REFUSED },
+ { .name = "truncated", .key = DNS_ERR_TRUNCATED },
+ { .name = "unknown", .key = DNS_ERR_UNKNOWN },
+ { .name = "tor_timeout", .key = DNS_ERR_TIMEOUT },
+ { .name = "shutdown", .key = DNS_ERR_SHUTDOWN },
+ { .name = "cancel", .key = DNS_ERR_CANCEL },
+ { .name = "nodata", .key = DNS_ERR_NODATA },
+ };
+ static const size_t num_errors = ARRAY_LENGTH(errors);
+
+ /* NOTE: Disable the record type label until libevent is fixed. */
+#if 0
+ for (size_t i = 0; i < num_dns_types; i++) {
+ /* Dup the label because metrics_format_label() returns a pointer to a
+ * string on the stack and we need that label for all metrics. */
+ char *record_label =
+ tor_strdup(metrics_format_label("record", dns_types[i].name));
+
+ for (size_t j = 0; j < num_errors; j++) {
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry, record_label);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("reason", errors[j].name));
+ metrics_store_entry_update(sentry,
+ rep_hist_get_n_dns_error(dns_types[i].type, errors[j].key));
+ }
+ tor_free(record_label);
+ }
+#endif
+
+ /* Put in the DNS errors, unfortunately not per-type for now. */
+ for (size_t j = 0; j < num_errors; j++) {
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("reason", errors[j].name));
+ metrics_store_entry_update(sentry,
+ rep_hist_get_n_dns_error(0, errors[j].key));
+ }
+}
+
+/** Fill function for the RELAY_METRICS_NUM_DNS metrics. */
+static void
+fill_dns_query_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_DNS];
+
+ /* NOTE: Disable the record type label until libevent is fixed (#40490). */
+#if 0
+ for (size_t i = 0; i < num_dns_types; i++) {
+ /* Dup the label because metrics_format_label() returns a pointer to a
+ * string on the stack and we need that label for all metrics. */
+ char *record_label =
+ tor_strdup(metrics_format_label("record", dns_types[i].name));
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry, record_label);
+ metrics_store_entry_update(sentry,
+ rep_hist_get_n_dns_request(dns_types[i].type));
+ tor_free(record_label);
+ }
+#endif
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_update(sentry, rep_hist_get_n_dns_request(0));
+}
+
+/** Fill function for the RELAY_METRICS_NUM_GLOBAL_RW_LIMIT metrics. */
+static void
+fill_global_bw_limit_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_GLOBAL_RW_LIMIT];
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("side", "read"));
+ metrics_store_entry_update(sentry, rep_hist_get_n_read_limit_reached());
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("side", "write"));
+ metrics_store_entry_update(sentry, rep_hist_get_n_write_limit_reached());
+}
+
+/** Fill function for the RELAY_METRICS_NUM_SOCKETS metrics. */
+static void
+fill_socket_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_SOCKETS];
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("state", "opened"));
+ metrics_store_entry_update(sentry, get_n_open_sockets());
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_update(sentry, get_max_sockets());
+}
+
+/** Fill function for the RELAY_METRICS_NUM_ONIONSKINS metrics. */
+static void
+fill_onionskins_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_ONIONSKINS];
+
+ for (uint16_t t = 0; t <= MAX_ONION_HANDSHAKE_TYPE; t++) {
+ /* Dup the label because metrics_format_label() returns a pointer to a
+ * string on the stack and we need that label for all metrics. */
+ char *type_label =
+ tor_strdup(metrics_format_label("type", handshake_type_to_str(t)));
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry, type_label);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("action", "processed"));
+ metrics_store_entry_update(sentry,
+ rep_hist_get_circuit_n_handshake_assigned(t));
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry, type_label);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("action", "dropped"));
+ metrics_store_entry_update(sentry,
+ rep_hist_get_circuit_n_handshake_dropped(t));
+ tor_free(type_label);
+ }
+}
+
+/** Fill function for the RELAY_METRICS_NUM_OOM_BYTES metrics. */
+static void
+fill_oom_values(void)
+{
+ metrics_store_entry_t *sentry;
+ const relay_metrics_entry_t *rentry =
+ &base_metrics[RELAY_METRICS_NUM_OOM_BYTES];
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("subsys", "cell"));
+ metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_cell);
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("subsys", "dns"));
+ metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_dns);
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("subsys", "geoip"));
+ metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_geoip);
+
+ sentry = metrics_store_add(the_store, rentry->type, rentry->name,
+ rentry->help);
+ metrics_store_entry_add_label(sentry,
+ metrics_format_label("subsys", "hsdir"));
+ metrics_store_entry_update(sentry, oom_stats_n_bytes_removed_hsdir);
+}
+
+/** Reset the global store and fill it with all the metrics from base_metrics
+ * and their associated values.
+ *
+ * To pull this off, every metrics has a "fill" function that is called and in
+ * charge of adding the metrics to the store, appropriate labels and finally
+ * updating the value to report. */
+static void
+fill_store(void)
+{
+ /* Reset the current store, we are about to fill it with all the things. */
+ metrics_store_reset(the_store);
+
+ /* Call the fill function for each metrics. */
+ for (size_t i = 0; i < num_base_metrics; i++) {
+ if (BUG(!base_metrics[i].fill_fn)) {
+ continue;
+ }
+ base_metrics[i].fill_fn();
+ }
+}
+
+/** Return a list of all the relay metrics stores. This is the
+ * function attached to the .get_metrics() member of the subsys_t. */
+const smartlist_t *
+relay_metrics_get_stores(void)
+{
+ /* We can't have the caller to free the returned list so keep it static,
+ * simply update it. */
+ static smartlist_t *stores_list = NULL;
+
+ /* We dynamically fill the store with all the metrics upon a request. The
+ * reason for this is because the exposed metrics of a relay are often
+ * internal counters in the fast path and thus we fetch the value when a
+ * metrics port request arrives instead of keeping a local metrics store of
+ * those values. */
+ fill_store();
+
+ if (!stores_list) {
+ stores_list = smartlist_new();
+ smartlist_add(stores_list, the_store);
+ }
+
+ return stores_list;
+}
+
+/** Initialize the relay metrics. */
+void
+relay_metrics_init(void)
+{
+ if (BUG(the_store)) {
+ return;
+ }
+ the_store = metrics_store_new();
+}
+
+/** Free the relay metrics. */
+void
+relay_metrics_free(void)
+{
+ if (!the_store) {
+ return;
+ }
+ /* NULL is set with this call. */
+ metrics_store_free(the_store);
+}