1 files changed, 257 insertions, 0 deletions
diff --git a/src/core/or/congestion_control_st.h b/src/core/or/congestion_control_st.h
new file mode 100644
index 0000000000..251ebd82e3
--- /dev/null
+++ b/src/core/or/congestion_control_st.h
@@ -0,0 +1,257 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_st.h
+ * \brief Structure definitions for congestion control.
+ **/
+
+#ifndef CONGESTION_CONTROL_ST_H
+#define CONGESTION_CONTROL_ST_H
+
+#include "core/or/crypt_path_st.h"
+#include "core/or/circuit_st.h"
+
+/** Signifies which sendme algorithm to use */
+typedef enum {
+  /** OG Tor fixed-sized circ and stream windows. It sucks, but it is important
+   * to make sure that the new algs can compete with the old garbage. */
+  CC_ALG_SENDME = 0,
+
+  /**
+   * Prop#324 TOR_WESTWOOD - Deliberately agressive. Westwood may not even
+   * converge to fairness in some cases because max RTT will also increase
+   * on congesgtion, which boosts the Westwood RTT congestion threshhold. So it
+   * can cause runaway queue bloat, which may or may not lead to a robot
+   * uprising... Ok that's Westworld, not Westwood. Still, we need to test
+   * Vegas and NOLA against something more agressive to ensure they do not
+   * starve in the presence of cheaters. We also need to make sure cheaters
+   * trigger the oomkiller in those cases.
+   */
+  CC_ALG_WESTWOOD = 1,
+
+  /**
+   * Prop#324 TOR_VEGAS - TCP Vegas-style BDP tracker. Because Vegas backs off
+   * whenever it detects queue delay, it can be beaten out by more agressive
+   * algs. However, in live network testing, it seems to do just fine against
+   * current SENDMEs. It outperforms Westwood and does not stall. */
+  CC_ALG_VEGAS = 2,
+
+  /**
+   * Prop#324: TOR_NOLA - NOLA looks the BDP right in the eye and uses it
+   * immediately as CWND. No slow start, no other congestion signals, no delay,
+   * no bullshit. Like TOR_VEGAS, it also uses agressive BDP estimates, to
+   * avoid out-competition. It seems a bit better throughput than Vegas,
+   * but its agressive BDP and rapid updates may lead to more queue latency. */
+  CC_ALG_NOLA = 3,
+} cc_alg_t;
+
+/* Total number of CC algs in cc_alg_t enum */
+#define NUM_CC_ALGS  (CC_ALG_NOLA+1)
+
+/** Signifies how we estimate circuit BDP */
+typedef enum {
+  /* CWND-based BDP will respond to changes in RTT only, and is relative
+   * to cwnd growth. So in slow-start, this will under-estimate BDP */
+  BDP_ALG_CWND_RTT = 0,
+
+  /* Sendme-based BDP will quickly measure BDP in less than
+   * a cwnd worth of data when in use. So it should be good for slow-start.
+   * But if the link goes idle, it will be vastly lower than true BDP. Thus,
+   * this estimate gets reset when the cwnd is not fully utilized. */
+  BDP_ALG_SENDME_RATE = 1,
+
+  /* Inflight BDP is similar to the cwnd estimator, except it uses
+   * packets inflight minus local circuit queues instead of current cwnd.
+   * Because it is strictly less than or equal to the cwnd, it will cause
+   * the cwnd to drift downward. It is only used if the local OR connection
+   * is blocked. */
+  BDP_ALG_INFLIGHT_RTT = 2,
+
+  /* The Piecewise BDP estimator uses the CWND estimator before there
+   * are sufficient SENDMEs to calculate the SENDME estimator. At that
+   * point, it uses the SENDME estimator, unless the local OR connection
+   * becomes blocked. In that case, it switches to the inflight estimator. */
+  BDP_ALG_PIECEWISE = 3,
+
+} bdp_alg_t;
+
+/** Total number of BDP algs in bdp_alg_t enum */
+#define NUM_BDP_ALGS (BDP_ALG_PIECEWISE+1)
+
+/** Westwood algorithm parameters */
+struct westwood_params_t {
+    /** Cwnd backoff multiplier upon congestion (as percent) */
+    uint8_t cwnd_backoff_m;
+    /** Max RTT backoff multiplier upon congestion (as percent) */
+    uint8_t rtt_backoff_m;
+
+    /** Threshold between min and max RTT, to signal congestion (percent) */
+    uint8_t rtt_thresh;
+
+    /**
+     * If true, use minimum of BDP and backoff multiplication in backoff.
+     * If false, use maximum of BDP and backoff multiplication in backoff. */
+    bool min_backoff;
+};
+
+/** Vegas algorithm parameters. */
+struct vegas_params_t {
+    /** The queue use allowed before we exit slow start */
+    uint16_t gamma;
+    /** The queue use below which we increment cwnd */
+    uint16_t alpha;
+    /** The queue use above which we decrement cwnd */
+    uint16_t beta;
+    /** Weighted average (percent) between cwnd estimator and
+     * piecewise estimator. */
+    uint8_t bdp_mix_pct;
+};
+
+/** NOLA consensus params */
+struct nola_params_t {
+    /** How many cells to add to BDP estimate to obtain cwnd */
+    uint16_t bdp_overshoot;
+};
+
+/** Fields common to all congestion control algorithms */
+typedef struct congestion_control_t {
+  /**
+   * Smartlist of uint64_t monotime usec timestamps of when we sent a data
+   * cell that is pending a sendme. FIFO queue that is managed similar to
+   * sendme_last_digests. */
+  smartlist_t *sendme_pending_timestamps;
+
+  /**
+   * Smartlist of uint64_t monotime timestamp of when sendme's arrived.
+   * FIFO queue that is managed similar to sendme_last_digests.
+   * Used to estimate circuitbandwidth and BDP. */
+  smartlist_t *sendme_arrival_timestamps;
+
+  /** RTT time data for congestion control. */
+  uint64_t ewma_rtt_usec;
+  uint64_t min_rtt_usec;
+  uint64_t max_rtt_usec;
+
+  /* BDP estimates by algorithm */
+  uint64_t bdp[NUM_BDP_ALGS];
+
+  /** Congestion window */
+  uint64_t cwnd;
+
+  /** Number of cells in-flight (sent but awaiting SENDME ack). */
+  uint64_t inflight;
+
+  /**
+   * For steady-state: the number of sendme acks until we will acknowledge
+   * a congestion event again. It starts out as the number of sendme acks
+   * in a congestion windowm and is decremented each ack. When this reaches
+   * 0, it means we should examine our congestion algorithm conditions.
+   * In this way, we only react to one congestion event per congestion window.
+   *
+   * It is also reset to 0 immediately whenever the circuit's orconn is
+   * blocked, and when a previously blocked orconn is unblocked.
+   */
+  uint64_t next_cc_event;
+
+  /** Are we in slow start? */
+  bool in_slow_start;
+
+  /** Is the local channel blocked on us? That's a congestion signal */
+  bool blocked_chan;
+
+  /* The following parameters are cached from consensus values upon
+   * circuit setup. */
+
+  /** Percent of cwnd to increment by during slow start */
+  uint16_t cwnd_inc_pct_ss;
+
+  /** Number of cells to increment cwnd by during steady state */
+  uint16_t cwnd_inc;
+
+  /** Minimum congestion window (must be at least sendme_inc) */
+  uint16_t cwnd_min;
+
+  /**
+   * Number of times per congestion window to update based on congestion
+   * signals */
+  uint8_t cwnd_inc_rate;
+
+  /**
+   * Number of cwnd worth of sendme acks to smooth RTT and BDP with,
+   * using N_EWMA */
+  uint8_t ewma_cwnd_cnt;
+
+  /**
+   * Minimum number of sendmes before we begin BDP estimates
+   */
+  uint8_t bwe_sendme_min;
+
+  /**
+   * Number of cells to ack with every sendme. Taken from consensus parameter
+   * and negotiation during circuit setup. */
+  uint8_t sendme_inc;
+
+  /** Which congestion control algorithm to use. Taken from
+   * consensus parameter and negotiation during circuit setup. */
+  cc_alg_t cc_alg;
+
+  /** Which algorithm to estimate circuit bandwidth with. Taken from
+   * consensus parameter during circuit setup. */
+  bdp_alg_t bdp_alg;
+
+  /** Algorithm-specific parameters. The specific struct that is used
+   * depends upon the algoritghm selected by the cc_alg parameter.
+   * These should not be accessed anywhere other than the algorithm-specific
+   * files. */
+  union {
+    struct westwood_params_t westwood_params;
+    struct vegas_params_t vegas_params;
+    struct nola_params_t nola_params;
+  };
+} congestion_control_t;
+
+/**
+ * Returns the number of sendme acks we will recieve before we update cwnd.
+ *
+ * Congestion control literature recommends only one update of cwnd per
+ * cwnd worth of acks. However, we can also tune this to be more frequent
+ * by increasing the 'cc_cwnd_inc_rate' consensus parameter.
+ *
+ * If this returns 0 due to high cwnd_inc_rate, the calling code will
+ * update every sendme ack.
+ */
+static inline uint64_t CWND_UPDATE_RATE(const congestion_control_t *cc)
+{
+  /* We add cwnd_inc_rate*sendme_inc/2 to round to nearest integer number
+   * of acks */
+  return ((cc->cwnd + cc->cwnd_inc_rate*cc->sendme_inc/2)
+           / (cc->cwnd_inc_rate*cc->sendme_inc));
+}
+
+/**
+ * Returns the amount to increment the congestion window each update,
+ * during slow start.
+ *
+ * Congestion control literature recommends either doubling the cwnd
+ * every cwnd during slow start, or some similar exponential growth
+ * (such as 50% more every cwnd, for Vegas).
+ *
+ * This is controlled by a consensus parameter 'cwnd_inc_pct_ss', which
+ * allows us to specify the percent of the current consensus window
+ * to update by.
+ */
+static inline uint64_t CWND_INC_SS(const congestion_control_t *cc)
+{
+  return (cc->cwnd_inc_pct_ss*cc->cwnd/100);
+}
+
+/**
+ * Returns the amount to increment (and for Vegas, also decrement) the
+ * congestion window by, every update period.
+ *
+ * This is controlled by the cc_cwnd_inc consensus parameter.
+ */
+#define CWND_INC(cc)           ((cc)->cwnd_inc)
+
+#endif /* !defined(CONGESTION_CONTROL_ST_H) */