1 files changed, 643 insertions, 99 deletions
diff --git a/src/feature/stats/rephist.c b/src/feature/stats/rephist.c
index 3c22fda3b8..ae4420ae50 100644
--- a/src/feature/stats/rephist.c
+++ b/src/feature/stats/rephist.c
@@ -1,5 +1,5 @@
 /* Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
- * Copyright (c) 2007-2020, The Tor Project, Inc. */
+ * Copyright (c) 2007-2021, The Tor Project, Inc. */
 /* See LICENSE for licensing information */
 
 /**
@@ -84,6 +84,8 @@
 #include "feature/nodelist/networkstatus_st.h"
 #include "core/or/or_circuit_st.h"
 
+#include <event2/dns.h>
+
 #ifdef HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
@@ -183,6 +185,244 @@ static time_t started_tracking_stability = 0;
 /** Map from hex OR identity digest to or_history_t. */
 static digestmap_t *history_map = NULL;
 
+/** Represents a state of overload stats.
+ *
+ *  All the timestamps in this structure have already been rounded down to the
+ *  nearest hour. */
+typedef struct {
+  /* When did we last experience a general overload? */
+  time_t overload_general_time;
+
+  /* When did we last experience a bandwidth-related overload? */
+  time_t overload_ratelimits_time;
+  /* How many times have we gone off the our read limits? */
+  uint64_t overload_read_count;
+  /* How many times have we gone off the our write limits? */
+  uint64_t overload_write_count;
+
+  /* When did we last experience a file descriptor exhaustion? */
+  time_t overload_fd_exhausted_time;
+  /* How many times have we experienced a file descriptor exhaustion? */
+  uint64_t overload_fd_exhausted;
+} overload_stats_t;
+
+/***** DNS statistics *****/
+
+/** Represents the statistics of DNS queries seen if it is an Exit. */
+typedef struct {
+  /** Total number of DNS request seen at an Exit. They might not all end
+   * successfully or might even be lost by tor. This counter is incremented
+   * right before the DNS request is initiated. */
+  uint64_t stats_n_request;
+
+  /** Total number of DNS timeout errors. */
+  uint64_t stats_n_error_timeout;
+
+  /** When is the next assessment time of the general overload for DNS errors.
+   * Once this time is reached, all stats are reset and this time is set to the
+   * next assessment time. */
+  time_t next_assessment_time;
+} overload_dns_stats_t;
+
+/** Keep track of the DNS requests for the general overload state. */
+static overload_dns_stats_t overload_dns_stats;
+
+/* We use a scale here so we can represent percentages with decimal points by
+ * scaling the value by this factor and so 0.5% becomes a value of 500.
+ * Default is 1% and thus min and max range is 0 to 100%. */
+#define OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE 1000.0
+#define OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT 1000
+#define OVERLOAD_DNS_TIMEOUT_PERCENT_MIN 0
+#define OVERLOAD_DNS_TIMEOUT_PERCENT_MAX 100000
+
+/** Consensus parameter: indicate what fraction of DNS timeout errors over the
+ * total number of DNS requests must be reached before we trigger a general
+ * overload signal .*/
+static double overload_dns_timeout_fraction =
+   OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT /
+   OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0;
+
+/* Number of seconds for the assessment period. Default is 10 minutes (600) and
+ * the min max range is within a 32bit value. */
+#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT (10 * 60)
+#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN 0
+#define OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX INT32_MAX
+
+/** Consensus parameter: Period, in seconds, over which we count the number of
+ * DNS requests and timeout errors. After that period, we assess if we trigger
+ * an overload or not. */
+static int32_t overload_dns_timeout_period_secs =
+  OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT;
+
+/** Current state of overload stats */
+static overload_stats_t overload_stats;
+
+/** Return true if this overload happened within the last `n_hours`. */
+static bool
+overload_happened_recently(time_t overload_time, int n_hours)
+{
+  /* An overload is relevant if it happened in the last 72 hours */
+  if (overload_time > approx_time() - 3600 * n_hours) {
+    return true;
+  }
+  return false;
+}
+
+/** Assess the DNS timeout errors and if we have enough to trigger a general
+ * overload. */
+static void
+overload_general_dns_assessment(void)
+{
+  /* Initialize the time. Should be done once. */
+  if (overload_dns_stats.next_assessment_time == 0) {
+    goto reset;
+  }
+
+  /* Not the time yet. */
+  if (overload_dns_stats.next_assessment_time > approx_time()) {
+    return;
+  }
+
+ reset:
+  /* Reset counters for the next period. */
+  overload_dns_stats.stats_n_error_timeout = 0;
+  overload_dns_stats.stats_n_request = 0;
+  overload_dns_stats.next_assessment_time =
+    approx_time() + overload_dns_timeout_period_secs;
+}
+
+/** Note a DNS error for the given given libevent DNS record type and error
+ * code. Possible types are: DNS_IPv4_A, DNS_PTR, DNS_IPv6_AAAA.
+ *
+ * IMPORTANT: Libevent is _not_ returning the type in case of an error and so
+ * if error is anything but DNS_ERR_NONE, the type is not usable and set to 0.
+ *
+ * See: https://gitlab.torproject.org/tpo/core/tor/-/issues/40490 */
+void
+rep_hist_note_dns_query(int type, uint8_t error)
+{
+  (void) type;
+
+  /* Assess if we need to trigger a general overload with regards to the DNS
+   * errors or not. */
+  overload_general_dns_assessment();
+
+  /* We only care about timeouts for the moment. */
+  switch (error) {
+  case DNS_ERR_TIMEOUT:
+    overload_dns_stats.stats_n_error_timeout++;
+    break;
+  default:
+    break;
+  }
+
+  /* Increment total number of requests. */
+  overload_dns_stats.stats_n_request++;
+}
+
+/* The current version of the overload stats version */
+#define OVERLOAD_STATS_VERSION 1
+
+/** Returns an allocated string for server descriptor for publising information
+ * on whether we are overloaded or not. */
+char *
+rep_hist_get_overload_general_line(void)
+{
+  char *result = NULL;
+  char tbuf[ISO_TIME_LEN+1];
+
+  /* Encode the general overload */
+  if (overload_happened_recently(overload_stats.overload_general_time, 72)) {
+    format_iso_time(tbuf, overload_stats.overload_general_time);
+    tor_asprintf(&result, "overload-general %d %s\n",
+                 OVERLOAD_STATS_VERSION, tbuf);
+  }
+
+  return result;
+}
+
+/** Returns an allocated string for extra-info documents for publishing
+ *  overload statistics. */
+char *
+rep_hist_get_overload_stats_lines(void)
+{
+  char *result = NULL;
+  smartlist_t *chunks = smartlist_new();
+  char tbuf[ISO_TIME_LEN+1];
+
+  /* Add bandwidth-related overloads */
+  if (overload_happened_recently(overload_stats.overload_ratelimits_time,24)) {
+    const or_options_t *options = get_options();
+    format_iso_time(tbuf, overload_stats.overload_ratelimits_time);
+    smartlist_add_asprintf(chunks,
+                           "overload-ratelimits %d %s %" PRIu64 " %" PRIu64
+                           " %" PRIu64 " %" PRIu64 "\n",
+                           OVERLOAD_STATS_VERSION, tbuf,
+                           options->BandwidthRate, options->BandwidthBurst,
+                           overload_stats.overload_read_count,
+                           overload_stats.overload_write_count);
+  }
+
+  /* Finally file descriptor overloads */
+  if (overload_happened_recently(
+                              overload_stats.overload_fd_exhausted_time, 72)) {
+    format_iso_time(tbuf, overload_stats.overload_fd_exhausted_time);
+    smartlist_add_asprintf(chunks, "overload-fd-exhausted %d %s\n",
+                           OVERLOAD_STATS_VERSION, tbuf);
+  }
+
+  /* Bail early if we had nothing to write */
+  if (smartlist_len(chunks) == 0) {
+    goto done;
+  }
+
+  result = smartlist_join_strings(chunks, "", 0, NULL);
+
+ done:
+  SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp));
+  smartlist_free(chunks);
+  return result;
+}
+
+/** Round down the time in `a` to the beginning of the current hour */
+#define SET_TO_START_OF_HOUR(a) STMT_BEGIN \
+  (a) = approx_time() - (approx_time() % 3600); \
+STMT_END
+
+/** Note down an overload event of type `overload`. */
+void
+rep_hist_note_overload(overload_type_t overload)
+{
+  static time_t last_read_counted = 0;
+  static time_t last_write_counted = 0;
+
+  switch (overload) {
+  case OVERLOAD_GENERAL:
+    SET_TO_START_OF_HOUR(overload_stats.overload_general_time);
+    break;
+  case OVERLOAD_READ: {
+    SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time);
+    if (approx_time() >= last_read_counted + 60) { /* Count once a minute */
+        overload_stats.overload_read_count++;
+        last_read_counted = approx_time();
+    }
+    break;
+  }
+  case OVERLOAD_WRITE: {
+    SET_TO_START_OF_HOUR(overload_stats.overload_ratelimits_time);
+    if (approx_time() >= last_write_counted + 60) { /* Count once a minute */
+      overload_stats.overload_write_count++;
+      last_write_counted = approx_time();
+    }
+    break;
+  }
+  case OVERLOAD_FD_EXHAUSTED:
+    SET_TO_START_OF_HOUR(overload_stats.overload_fd_exhausted_time);
+    overload_stats.overload_fd_exhausted++;
+    break;
+  }
+}
+
 /** Return the or_history_t for the OR with identity digest <b>id</b>,
  * creating it if necessary. */
 static or_history_t *
@@ -1654,12 +1894,144 @@ STATIC int onion_handshakes_requested[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
 STATIC int onion_handshakes_assigned[MAX_ONION_HANDSHAKE_TYPE+1] = {0};
 /**@}*/
 
+/** Counters keeping the same stats as above but for the entire duration of the
+ * process (not reset). */
+static uint64_t stats_n_onionskin_assigned[MAX_ONION_STAT_TYPE+1] = {0};
+static uint64_t stats_n_onionskin_dropped[MAX_ONION_STAT_TYPE+1] = {0};
+
+/* We use a scale here so we can represent percentages with decimal points by
+ * scaling the value by this factor and so 0.5% becomes a value of 500.
+ * Default is 1% and thus min and max range is 0 to 100%. */
+#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE 1000.0
+#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT 1000
+#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_MIN 0
+#define OVERLOAD_ONIONSKIN_NTOR_PERCENT_MAX 100000
+
+/** Consensus parameter: indicate what fraction of ntor onionskin drop over the
+ * total number of requests must be reached before we trigger a general
+ * overload signal.*/
+static double overload_onionskin_ntor_fraction =
+   OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT /
+   OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE / 100.0;
+
+/* Number of seconds for the assessment period. Default is 6 hours (21600) and
+ * the min max range is within a 32bit value. We align this period to the
+ * Heartbeat so the logs would match this period more or less. */
+#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT (60 * 60 * 6)
+#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MIN 0
+#define OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MAX INT32_MAX
+
+/** Consensus parameter: Period, in seconds, over which we count the number of
+ * ntor onionskins requests and how many were dropped. After that period, we
+ * assess if we trigger an overload or not. */
+static int32_t overload_onionskin_ntor_period_secs =
+  OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT;
+
+/** Structure containing information for an assessment period of the onionskin
+ * drop overload general signal.
+ *
+ * It is used to track, within a time period, how many requests we've gotten
+ * and how many were dropped. The overload general signal is decided from these
+ * depending on some consensus parameters. */
+typedef struct {
+  /** Total number of ntor onionskin requested for an assessment period. */
+  uint64_t n_ntor_requested;
+
+  /** Total number of dropped ntor onionskins for an assessment period. */
+  uint64_t n_ntor_dropped;
+
+  /** When is the next assessment time of the general overload for ntor
+   * onionskin drop. Once this time is reached, all stats are reset and this
+   * time is set to the next assessment time. */
+  time_t next_assessment_time;
+} overload_onionskin_assessment_t;
+
+/** Keep track of the onionskin requests for an assessment period. */
+static overload_onionskin_assessment_t overload_onionskin_assessment;
+
+/**
+ * We combine ntorv3 and ntor into the same stat, so we must
+ * use this function to covert the cell type to a stat index.
+ */
+static inline uint16_t
+onionskin_type_to_stat(uint16_t type)
+{
+  if (BUG(type > MAX_ONION_STAT_TYPE)) {
+    return MAX_ONION_STAT_TYPE; // use ntor if out of range
+  }
+
+  return type;
+}
+
+/** Assess our ntor handshake statistics and decide if we need to emit a
+ * general overload signal.
+ *
+ * Regardless of overloaded or not, if the assessment time period has passed,
+ * the stats are reset back to 0 and the assessment time period updated.
+ *
+ * This is called when a ntor handshake is _requested_ because we want to avoid
+ * to have an assymetric situation where requested counter is reset to 0 but
+ * then a drop happens leading to the drop counter being incremented while the
+ * requested counter is 0. */
+static void
+overload_general_onionskin_assessment(void)
+{
+  /* Initialize the time. Should be done once. */
+  if (overload_onionskin_assessment.next_assessment_time == 0) {
+    goto reset;
+  }
+
+  /* Not the time yet. */
+  if (overload_onionskin_assessment.next_assessment_time > approx_time()) {
+    goto done;
+  }
+
+  /* Make sure we have enough requests to be able to make a proper assessment.
+   * We want to avoid 1 single request/drop to trigger an overload as we want
+   * at least the number of requests to be above the scale of our fraction. */
+  if (overload_onionskin_assessment.n_ntor_requested <
+      OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE) {
+    goto done;
+  }
+
+  /* Lets see if we can signal a general overload. */
+  double fraction = (double) overload_onionskin_assessment.n_ntor_dropped /
+                    (double) overload_onionskin_assessment.n_ntor_requested;
+  if (fraction >= overload_onionskin_ntor_fraction) {
+    log_notice(LD_HIST, "General overload -> Ntor dropped (%" PRIu64 ") "
+               "fraction %.4f%% is above threshold of %.4f%%",
+               overload_onionskin_assessment.n_ntor_dropped,
+               fraction * 100.0,
+               overload_onionskin_ntor_fraction * 100.0);
+    rep_hist_note_overload(OVERLOAD_GENERAL);
+  }
+
+ reset:
+  /* Reset counters for the next period. */
+  overload_onionskin_assessment.n_ntor_dropped = 0;
+  overload_onionskin_assessment.n_ntor_requested = 0;
+  overload_onionskin_assessment.next_assessment_time =
+    approx_time() + overload_onionskin_ntor_period_secs;
+
+ done:
+  return;
+}
+
 /** A new onionskin (using the <b>type</b> handshake) has arrived. */
 void
 rep_hist_note_circuit_handshake_requested(uint16_t type)
 {
-  if (type <= MAX_ONION_HANDSHAKE_TYPE)
-    onion_handshakes_requested[type]++;
+  uint16_t stat = onionskin_type_to_stat(type);
+
+  onion_handshakes_requested[stat]++;
+
+  /* Only relays get to record requested onionskins. */
+  if (stat == ONION_HANDSHAKE_TYPE_NTOR) {
+    /* Assess if we've reached the overload general signal. */
+    overload_general_onionskin_assessment();
+
+    overload_onionskin_assessment.n_ntor_requested++;
+  }
 }
 
 /** We've sent an onionskin (using the <b>type</b> handshake) to a
@@ -1667,8 +2039,24 @@ rep_hist_note_circuit_handshake_requested(uint16_t type)
 void
 rep_hist_note_circuit_handshake_assigned(uint16_t type)
 {
-  if (type <= MAX_ONION_HANDSHAKE_TYPE)
-    onion_handshakes_assigned[type]++;
+  onion_handshakes_assigned[onionskin_type_to_stat(type)]++;
+  stats_n_onionskin_assigned[onionskin_type_to_stat(type)]++;
+}
+
+/** We've just drop an onionskin (using the <b>type</b> handshake) due to being
+ * overloaded. */
+void
+rep_hist_note_circuit_handshake_dropped(uint16_t type)
+{
+  uint16_t stat = onionskin_type_to_stat(type);
+
+  stats_n_onionskin_dropped[stat]++;
+
+  /* Only relays get to record requested onionskins. */
+  if (stat == ONION_HANDSHAKE_TYPE_NTOR) {
+    /* Note the dropped ntor in the overload assessment object. */
+    overload_onionskin_assessment.n_ntor_dropped++;
+  }
 }
 
 /** Get the circuit handshake value that is requested. */
@@ -1710,123 +2098,214 @@ rep_hist_log_circuit_handshake_stats(time_t now)
 
 /** Start of the current hidden service stats interval or 0 if we're
  * not collecting hidden service statistics. */
-static time_t start_of_hs_stats_interval;
+static time_t start_of_hs_v2_stats_interval;
 
-/** Carries the various hidden service statistics, and any other
- *  information needed. */
-typedef struct hs_stats_t {
-  /** How many relay cells have we seen as rendezvous points? */
-  uint64_t rp_relay_cells_seen;
+/** Our v2 statistics structure singleton. */
+static hs_v2_stats_t *hs_v2_stats = NULL;
 
-  /** Set of unique public key digests we've seen this stat period
-   * (could also be implemented as sorted smartlist). */
-  digestmap_t *onions_seen_this_period;
-} hs_stats_t;
+/** HSv2 stats */
 
-/** Our statistics structure singleton. */
-static hs_stats_t *hs_stats = NULL;
-
-/** Allocate, initialize and return an hs_stats_t structure. */
-static hs_stats_t *
-hs_stats_new(void)
+/** Allocate, initialize and return an hs_v2_stats_t structure. */
+static hs_v2_stats_t *
+hs_v2_stats_new(void)
 {
-  hs_stats_t *new_hs_stats = tor_malloc_zero(sizeof(hs_stats_t));
-  new_hs_stats->onions_seen_this_period = digestmap_new();
+  hs_v2_stats_t *new_hs_v2_stats = tor_malloc_zero(sizeof(hs_v2_stats_t));
 
-  return new_hs_stats;
+  return new_hs_v2_stats;
 }
 
-#define hs_stats_free(val) \
-  FREE_AND_NULL(hs_stats_t, hs_stats_free_, (val))
+#define hs_v2_stats_free(val) \
+  FREE_AND_NULL(hs_v2_stats_t, hs_v2_stats_free_, (val))
 
-/** Free an hs_stats_t structure. */
+/** Free an hs_v2_stats_t structure. */
 static void
-hs_stats_free_(hs_stats_t *victim_hs_stats)
+hs_v2_stats_free_(hs_v2_stats_t *victim_hs_v2_stats)
 {
-  if (!victim_hs_stats) {
+  if (!victim_hs_v2_stats) {
     return;
   }
+  tor_free(victim_hs_v2_stats);
+}
+
+/** Clear history of hidden service statistics and set the measurement
+ * interval start to <b>now</b>. */
+static void
+rep_hist_reset_hs_v2_stats(time_t now)
+{
+  if (!hs_v2_stats) {
+    hs_v2_stats = hs_v2_stats_new();
+  }
 
-  digestmap_free(victim_hs_stats->onions_seen_this_period, NULL);
-  tor_free(victim_hs_stats);
+  hs_v2_stats->rp_v2_relay_cells_seen = 0;
+
+  start_of_hs_v2_stats_interval = now;
 }
 
-/** Initialize hidden service statistics. */
-void
-rep_hist_hs_stats_init(time_t now)
+/*** HSv3 stats ******/
+
+/** Start of the current hidden service stats interval or 0 if we're not
+ *  collecting hidden service statistics.
+ *
+ *  This is particularly important for v3 statistics since this variable
+ *  controls the start time of initial v3 stats collection. It's initialized by
+ *  rep_hist_hs_stats_init() to the next time period start (i.e. 12:00UTC), and
+ *  should_collect_v3_stats() ensures that functions that collect v3 stats do
+ *  not do so sooner than that.
+ *
+ *  Collecting stats from 12:00UTC to 12:00UTC is extremely important for v3
+ *  stats because rep_hist_hsdir_stored_maybe_new_v3_onion() uses the blinded
+ *  key of each onion service as its double-counting index. Onion services
+ *  rotate their descriptor at around 00:00UTC which means that their blinded
+ *  key also changes around that time. However the precise time that onion
+ *  services rotate their descriptors is actually when they fetch a new
+ *  00:00UTC consensus and that happens at a random time (e.g. it can even
+ *  happen at 02:00UTC). This means that if we started keeping v3 stats at
+ *  around 00:00UTC we wouldn't be able to tell when onion services change
+ *  their blinded key and hence we would double count an unpredictable amount
+ *  of them (for example, if an onion service fetches the 00:00UTC consensus at
+ *  01:00UTC it would upload to its old HSDir at 00:45UTC, and then to a
+ *  different HSDir at 01:50UTC).
+ *
+ *  For this reason, we start collecting statistics at 12:00UTC. This way we
+ *  know that by the time we stop collecting statistics for that time period 24
+ *  hours later, all the onion services have switched to their new blinded
+ *  key. This way we can predict much better how much double counting has been
+ *  performed.
+ */
+static time_t start_of_hs_v3_stats_interval;
+
+/** Our v3 statistics structure singleton. */
+static hs_v3_stats_t *hs_v3_stats = NULL;
+
+/** Allocate, initialize and return an hs_v3_stats_t structure. */
+static hs_v3_stats_t *
+hs_v3_stats_new(void)
+{
+  hs_v3_stats_t *new_hs_v3_stats = tor_malloc_zero(sizeof(hs_v3_stats_t));
+  new_hs_v3_stats->v3_onions_seen_this_period = digest256map_new();
+
+  return new_hs_v3_stats;
+}
+
+#define hs_v3_stats_free(val) \
+  FREE_AND_NULL(hs_v3_stats_t, hs_v3_stats_free_, (val))
+
+/** Free an hs_v3_stats_t structure. */
+static void
+hs_v3_stats_free_(hs_v3_stats_t *victim_hs_v3_stats)
 {
-  if (!hs_stats) {
-    hs_stats = hs_stats_new();
+  if (!victim_hs_v3_stats) {
+    return;
   }
 
-  start_of_hs_stats_interval = now;
+  digest256map_free(victim_hs_v3_stats->v3_onions_seen_this_period, NULL);
+  tor_free(victim_hs_v3_stats);
 }
 
 /** Clear history of hidden service statistics and set the measurement
  * interval start to <b>now</b>. */
 static void
-rep_hist_reset_hs_stats(time_t now)
+rep_hist_reset_hs_v3_stats(time_t now)
 {
-  if (!hs_stats) {
-    hs_stats = hs_stats_new();
+  if (!hs_v3_stats) {
+    hs_v3_stats = hs_v3_stats_new();
   }
 
-  hs_stats->rp_relay_cells_seen = 0;
+  digest256map_free(hs_v3_stats->v3_onions_seen_this_period, NULL);
+  hs_v3_stats->v3_onions_seen_this_period = digest256map_new();
 
-  digestmap_free(hs_stats->onions_seen_this_period, NULL);
-  hs_stats->onions_seen_this_period = digestmap_new();
+  hs_v3_stats->rp_v3_relay_cells_seen = 0;
 
-  start_of_hs_stats_interval = now;
+  start_of_hs_v3_stats_interval = now;
 }
 
-/** Stop collecting hidden service stats in a way that we can re-start
- * doing so in rep_hist_buffer_stats_init(). */
-void
-rep_hist_hs_stats_term(void)
+/** Return true if it's a good time to collect v3 stats.
+ *
+ *  v3 stats have a strict stats collection period (from 12:00UTC to 12:00UTC
+ *  on the real network). We don't want to collect statistics if (for example)
+ *  we just booted and it's 03:00UTC; we will wait until 12:00UTC before we
+ *  start collecting statistics to make sure that the final result represents
+ *  the whole collection period. This behavior is controlled by
+ *  rep_hist_hs_stats_init().
+ */
+MOCK_IMPL(STATIC bool,
+should_collect_v3_stats,(void))
 {
-  rep_hist_reset_hs_stats(0);
+  return start_of_hs_v3_stats_interval <= approx_time();
 }
 
-/** We saw a new HS relay cell, Count it! */
+/** We just received a new descriptor with <b>blinded_key</b>. See if we've
+ * seen this blinded key before, and if not add it to the stats.  */
 void
-rep_hist_seen_new_rp_cell(void)
+rep_hist_hsdir_stored_maybe_new_v3_onion(const uint8_t *blinded_key)
 {
-  if (!hs_stats) {
-    return; // We're not collecting stats
+  /* Return early if we don't collect HSv3 stats, or if it's not yet the time
+   * to collect them. */
+  if (!hs_v3_stats || !should_collect_v3_stats()) {
+    return;
   }
 
-  hs_stats->rp_relay_cells_seen++;
+  bool seen_before =
+    !!digest256map_get(hs_v3_stats->v3_onions_seen_this_period,
+                       blinded_key);
+
+  log_info(LD_GENERAL, "Considering v3 descriptor with %s (%sseen before)",
+           safe_str(hex_str((char*)blinded_key, 32)),
+           seen_before ? "" : "not ");
+
+  /* Count it if we haven't seen it before. */
+  if (!seen_before) {
+    digest256map_set(hs_v3_stats->v3_onions_seen_this_period,
+                  blinded_key, (void*)(uintptr_t)1);
+  }
 }
 
-/** As HSDirs, we saw another hidden service with public key
- *  <b>pubkey</b>. Check whether we have counted it before, if not
- *  count it now! */
+/** We saw a new HS relay cell: count it!
+ *  If <b>is_v2</b> is set then it's a v2 RP cell, otherwise it's a v3. */
 void
-rep_hist_stored_maybe_new_hs(const crypto_pk_t *pubkey)
+rep_hist_seen_new_rp_cell(bool is_v2)
 {
-  char pubkey_hash[DIGEST_LEN];
+  log_debug(LD_GENERAL, "New RP cell (%d)", is_v2);
 
-  if (!hs_stats) {
-    return; // We're not collecting stats
+  if (is_v2 && hs_v2_stats) {
+    hs_v2_stats->rp_v2_relay_cells_seen++;
+  } else if (!is_v2 && hs_v3_stats && should_collect_v3_stats()) {
+    hs_v3_stats->rp_v3_relay_cells_seen++;
   }
+}
 
-  /* Get the digest of the pubkey which will be used to detect whether
-     we've seen this hidden service before or not.  */
-  if (crypto_pk_get_digest(pubkey, pubkey_hash) < 0) {
-    /*  This fail should not happen; key has been validated by
-        descriptor parsing code first. */
-    return;
+/** Generic HS stats code */
+
+/** Initialize v2 and v3 hidden service statistics. */
+void
+rep_hist_hs_stats_init(time_t now)
+{
+  if (!hs_v2_stats) {
+    hs_v2_stats = hs_v2_stats_new();
   }
 
-  /* Check if this is the first time we've seen this hidden
-     service. If it is, count it as new. */
-  if (!digestmap_get(hs_stats->onions_seen_this_period,
-                     pubkey_hash)) {
-    digestmap_set(hs_stats->onions_seen_this_period,
-                  pubkey_hash, (void*)(uintptr_t)1);
+  /* Start collecting v2 stats straight away */
+  start_of_hs_v2_stats_interval = now;
+
+  if (!hs_v3_stats) {
+    hs_v3_stats = hs_v3_stats_new();
   }
+
+  /* Start collecting v3 stats at the next 12:00 UTC */
+  start_of_hs_v3_stats_interval = hs_get_start_time_of_next_time_period(now);
+}
+
+/** Stop collecting hidden service stats in a way that we can re-start
+ * doing so in rep_hist_buffer_stats_init(). */
+void
+rep_hist_hs_stats_term(void)
+{
+  rep_hist_reset_hs_v2_stats(0);
+  rep_hist_reset_hs_v3_stats(0);
 }
 
+/** Stats reporting code */
+
 /* The number of cells that are supposed to be hidden from the adversary
  * by adding noise from the Laplace distribution.  This value, divided by
  * EPSILON, is Laplace parameter b. It must be greater than 0. */
@@ -1851,58 +2330,68 @@ rep_hist_stored_maybe_new_hs(const crypto_pk_t *pubkey)
 #define ONIONS_SEEN_BIN_SIZE 8
 
 /** Allocate and return a string containing hidden service stats that
- *  are meant to be placed in the extra-info descriptor. */
-static char *
-rep_hist_format_hs_stats(time_t now)
+ *  are meant to be placed in the extra-info descriptor.
+ *
+ *  Function works for both v2 and v3 stats depending on <b>is_v3</b>. */
+STATIC char *
+rep_hist_format_hs_stats(time_t now, bool is_v3)
 {
   char t[ISO_TIME_LEN+1];
   char *hs_stats_string;
-  int64_t obfuscated_cells_seen;
-  int64_t obfuscated_onions_seen;
+  int64_t obfuscated_onions_seen, obfuscated_cells_seen;
+
+  uint64_t rp_cells_seen = is_v3 ?
+    hs_v3_stats->rp_v3_relay_cells_seen : hs_v2_stats->rp_v2_relay_cells_seen;
+  size_t onions_seen = is_v3 ?
+    digest256map_size(hs_v3_stats->v3_onions_seen_this_period) : 0;
+  time_t start_of_hs_stats_interval = is_v3 ?
+    start_of_hs_v3_stats_interval : start_of_hs_v2_stats_interval;
 
   uint64_t rounded_cells_seen
-    = round_uint64_to_next_multiple_of(hs_stats->rp_relay_cells_seen,
-                                       REND_CELLS_BIN_SIZE);
+    = round_uint64_to_next_multiple_of(rp_cells_seen, REND_CELLS_BIN_SIZE);
   rounded_cells_seen = MIN(rounded_cells_seen, INT64_MAX);
   obfuscated_cells_seen = add_laplace_noise((int64_t)rounded_cells_seen,
                           crypto_rand_double(),
                           REND_CELLS_DELTA_F, REND_CELLS_EPSILON);
 
   uint64_t rounded_onions_seen =
-    round_uint64_to_next_multiple_of((size_t)digestmap_size(
-                                        hs_stats->onions_seen_this_period),
-                                     ONIONS_SEEN_BIN_SIZE);
+    round_uint64_to_next_multiple_of(onions_seen, ONIONS_SEEN_BIN_SIZE);
   rounded_onions_seen = MIN(rounded_onions_seen, INT64_MAX);
   obfuscated_onions_seen = add_laplace_noise((int64_t)rounded_onions_seen,
                            crypto_rand_double(), ONIONS_SEEN_DELTA_F,
                            ONIONS_SEEN_EPSILON);
 
   format_iso_time(t, now);
-  tor_asprintf(&hs_stats_string, "hidserv-stats-end %s (%d s)\n"
-               "hidserv-rend-relayed-cells %"PRId64" delta_f=%d "
-                                           "epsilon=%.2f bin_size=%d\n"
-               "hidserv-dir-onions-seen %"PRId64" delta_f=%d "
-                                        "epsilon=%.2f bin_size=%d\n",
+  tor_asprintf(&hs_stats_string, "%s %s (%u s)\n"
+               "%s %"PRId64" delta_f=%d epsilon=%.2f bin_size=%d\n"
+               "%s %"PRId64" delta_f=%d epsilon=%.2f bin_size=%d\n",
+               is_v3 ? "hidserv-v3-stats-end" : "hidserv-stats-end",
                t, (unsigned) (now - start_of_hs_stats_interval),
-               (obfuscated_cells_seen), REND_CELLS_DELTA_F,
+               is_v3 ?
+                "hidserv-rend-v3-relayed-cells" : "hidserv-rend-relayed-cells",
+               obfuscated_cells_seen, REND_CELLS_DELTA_F,
                REND_CELLS_EPSILON, REND_CELLS_BIN_SIZE,
-               (obfuscated_onions_seen),
-               ONIONS_SEEN_DELTA_F,
+               is_v3 ? "hidserv-dir-v3-onions-seen" :"hidserv-dir-onions-seen",
+               obfuscated_onions_seen, ONIONS_SEEN_DELTA_F,
                ONIONS_SEEN_EPSILON, ONIONS_SEEN_BIN_SIZE);
 
   return hs_stats_string;
 }
 
 /** If 24 hours have passed since the beginning of the current HS
- * stats period, write buffer stats to $DATADIR/stats/hidserv-stats
+ * stats period, write buffer stats to $DATADIR/stats/hidserv-v3-stats
  * (possibly overwriting an existing file) and reset counters.  Return
  * when we would next want to write buffer stats or 0 if we never want to
- * write. */
+ * write. Function works for both v2 and v3 stats depending on <b>is_v3</b>.
+ */
 time_t
-rep_hist_hs_stats_write(time_t now)
+rep_hist_hs_stats_write(time_t now, bool is_v3)
 {
   char *str = NULL;
 
+  time_t start_of_hs_stats_interval = is_v3 ?
+    start_of_hs_v3_stats_interval : start_of_hs_v2_stats_interval;
+
   if (!start_of_hs_stats_interval) {
     return 0; /* Not initialized. */
   }
@@ -1912,15 +2401,20 @@ rep_hist_hs_stats_write(time_t now)
   }
 
   /* Generate history string. */
-  str = rep_hist_format_hs_stats(now);
+  str = rep_hist_format_hs_stats(now, is_v3);
 
   /* Reset HS history. */
-  rep_hist_reset_hs_stats(now);
+  if (is_v3) {
+    rep_hist_reset_hs_v3_stats(now);
+  } else {
+    rep_hist_reset_hs_v2_stats(now);
+  }
 
   /* Try to write to disk. */
   if (!check_or_create_data_subdir("stats")) {
-    write_to_data_subdir("stats", "hidserv-stats", str,
-                         "hidden service stats");
+    write_to_data_subdir("stats",
+                         is_v3 ? "hidserv-v3-stats" : "hidserv-stats",
+                         str, "hidden service stats");
   }
 
  done:
@@ -2134,7 +2628,8 @@ rep_hist_log_link_protocol_counts(void)
 void
 rep_hist_free_all(void)
 {
-  hs_stats_free(hs_stats);
+  hs_v2_stats_free(hs_v2_stats);
+  hs_v3_stats_free(hs_v3_stats);
   digestmap_free(history_map, free_or_history);
 
   tor_free(exit_bytes_read);
@@ -2155,3 +2650,52 @@ rep_hist_free_all(void)
   tor_assert_nonfatal(rephist_total_alloc == 0);
   tor_assert_nonfatal_once(rephist_total_num == 0);
 }
+
+/** Called just before the consensus will be replaced. Update the consensus
+ * parameters in case they changed. */
+void
+rep_hist_consensus_has_changed(const networkstatus_t *ns)
+{
+  overload_onionskin_ntor_fraction =
+    networkstatus_get_param(ns, "overload_onionskin_ntor_scale_percent",
+                            OVERLOAD_ONIONSKIN_NTOR_PERCENT_DEFAULT,
+                            OVERLOAD_ONIONSKIN_NTOR_PERCENT_MIN,
+                            OVERLOAD_ONIONSKIN_NTOR_PERCENT_MAX) /
+    OVERLOAD_ONIONSKIN_NTOR_PERCENT_SCALE / 100.0;
+
+  overload_onionskin_ntor_period_secs =
+    networkstatus_get_param(ns, "overload_onionskin_ntor_period_secs",
+                            OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_DEFAULT,
+                            OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MIN,
+                            OVERLOAD_ONIONSKIN_NTOR_PERIOD_SECS_MAX);
+
+  /** XXX: Unused parameters. */
+  overload_dns_timeout_fraction =
+    networkstatus_get_param(ns, "overload_dns_timeout_scale_percent",
+                            OVERLOAD_DNS_TIMEOUT_PERCENT_DEFAULT,
+                            OVERLOAD_DNS_TIMEOUT_PERCENT_MIN,
+                            OVERLOAD_DNS_TIMEOUT_PERCENT_MAX) /
+    OVERLOAD_DNS_TIMEOUT_PERCENT_SCALE / 100.0;
+
+  overload_dns_timeout_period_secs =
+    networkstatus_get_param(ns, "overload_dns_timeout_period_secs",
+                            OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_DEFAULT,
+                            OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MIN,
+                            OVERLOAD_DNS_TIMEOUT_PERIOD_SECS_MAX);
+}
+
+#ifdef TOR_UNIT_TESTS
+/* only exists for unit tests: get HSv2 stats object */
+const hs_v2_stats_t *
+rep_hist_get_hs_v2_stats(void)
+{
+  return hs_v2_stats;
+}
+
+/* only exists for unit tests: get HSv2 stats object */
+const hs_v3_stats_t *
+rep_hist_get_hs_v3_stats(void)
+{
+  return hs_v3_stats;
+}
+#endif /* defined(TOR_UNIT_TESTS) */