src/core/or/congestion_control_st.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257

/* Copyright (c) 2019-2021, The Tor Project, Inc. */
/* See LICENSE for licensing information */

/**
 * \file congestion_control_st.h
 * \brief Structure definitions for congestion control.
 **/

#ifndef CONGESTION_CONTROL_ST_H
#define CONGESTION_CONTROL_ST_H

#include "core/or/crypt_path_st.h"
#include "core/or/circuit_st.h"

/** Signifies which sendme algorithm to use */
typedef enum {
  /** OG Tor fixed-sized circ and stream windows. It sucks, but it is important
   * to make sure that the new algs can compete with the old garbage. */
  CC_ALG_SENDME = 0,

  /**
   * Prop#324 TOR_WESTWOOD - Deliberately agressive. Westwood may not even
   * converge to fairness in some cases because max RTT will also increase
   * on congesgtion, which boosts the Westwood RTT congestion threshhold. So it
   * can cause runaway queue bloat, which may or may not lead to a robot
   * uprising... Ok that's Westworld, not Westwood. Still, we need to test
   * Vegas and NOLA against something more agressive to ensure they do not
   * starve in the presence of cheaters. We also need to make sure cheaters
   * trigger the oomkiller in those cases.
   */
  CC_ALG_WESTWOOD = 1,

  /**
   * Prop#324 TOR_VEGAS - TCP Vegas-style BDP tracker. Because Vegas backs off
   * whenever it detects queue delay, it can be beaten out by more agressive
   * algs. However, in live network testing, it seems to do just fine against
   * current SENDMEs. It outperforms Westwood and does not stall. */
  CC_ALG_VEGAS = 2,

  /**
   * Prop#324: TOR_NOLA - NOLA looks the BDP right in the eye and uses it
   * immediately as CWND. No slow start, no other congestion signals, no delay,
   * no bullshit. Like TOR_VEGAS, it also uses agressive BDP estimates, to
   * avoid out-competition. It seems a bit better throughput than Vegas,
   * but its agressive BDP and rapid updates may lead to more queue latency. */
  CC_ALG_NOLA = 3,
} cc_alg_t;

/* Total number of CC algs in cc_alg_t enum */
#define NUM_CC_ALGS  (CC_ALG_NOLA+1)

/** Signifies how we estimate circuit BDP */
typedef enum {
  /* CWND-based BDP will respond to changes in RTT only, and is relative
   * to cwnd growth. So in slow-start, this will under-estimate BDP */
  BDP_ALG_CWND_RTT = 0,

  /* Sendme-based BDP will quickly measure BDP in less than
   * a cwnd worth of data when in use. So it should be good for slow-start.
   * But if the link goes idle, it will be vastly lower than true BDP. Thus,
   * this estimate gets reset when the cwnd is not fully utilized. */
  BDP_ALG_SENDME_RATE = 1,

  /* Inflight BDP is similar to the cwnd estimator, except it uses
   * packets inflight minus local circuit queues instead of current cwnd.
   * Because it is strictly less than or equal to the cwnd, it will cause
   * the cwnd to drift downward. It is only used if the local OR connection
   * is blocked. */
  BDP_ALG_INFLIGHT_RTT = 2,

  /* The Piecewise BDP estimator uses the CWND estimator before there
   * are sufficient SENDMEs to calculate the SENDME estimator. At that
   * point, it uses the SENDME estimator, unless the local OR connection
   * becomes blocked. In that case, it switches to the inflight estimator. */
  BDP_ALG_PIECEWISE = 3,

} bdp_alg_t;

/** Total number of BDP algs in bdp_alg_t enum */
#define NUM_BDP_ALGS (BDP_ALG_PIECEWISE+1)

/** Westwood algorithm parameters */
struct westwood_params_t {
    /** Cwnd backoff multiplier upon congestion (as percent) */
    uint8_t cwnd_backoff_m;
    /** Max RTT backoff multiplier upon congestion (as percent) */
    uint8_t rtt_backoff_m;

    /** Threshold between min and max RTT, to signal congestion (percent) */
    uint8_t rtt_thresh;

    /**
     * If true, use minimum of BDP and backoff multiplication in backoff.
     * If false, use maximum of BDP and backoff multiplication in backoff. */
    bool min_backoff;
};

/** Vegas algorithm parameters. */
struct vegas_params_t {
    /** The queue use allowed before we exit slow start */
    uint16_t gamma;
    /** The queue use below which we increment cwnd */
    uint16_t alpha;
    /** The queue use above which we decrement cwnd */
    uint16_t beta;
    /** Weighted average (percent) between cwnd estimator and
     * piecewise estimator. */
    uint8_t bdp_mix_pct;
};

/** NOLA consensus params */
struct nola_params_t {
    /** How many cells to add to BDP estimate to obtain cwnd */
    uint16_t bdp_overshoot;
};

/** Fields common to all congestion control algorithms */
typedef struct congestion_control_t {
  /**
   * Smartlist of uint64_t monotime usec timestamps of when we sent a data
   * cell that is pending a sendme. FIFO queue that is managed similar to
   * sendme_last_digests. */
  smartlist_t *sendme_pending_timestamps;

  /**
   * Smartlist of uint64_t monotime timestamp of when sendme's arrived.
   * FIFO queue that is managed similar to sendme_last_digests.
   * Used to estimate circuitbandwidth and BDP. */
  smartlist_t *sendme_arrival_timestamps;

  /** RTT time data for congestion control. */
  uint64_t ewma_rtt_usec;
  uint64_t min_rtt_usec;
  uint64_t max_rtt_usec;

  /* BDP estimates by algorithm */
  uint64_t bdp[NUM_BDP_ALGS];

  /** Congestion window */
  uint64_t cwnd;

  /** Number of cells in-flight (sent but awaiting SENDME ack). */
  uint64_t inflight;

  /**
   * For steady-state: the number of sendme acks until we will acknowledge
   * a congestion event again. It starts out as the number of sendme acks
   * in a congestion windowm and is decremented each ack. When this reaches
   * 0, it means we should examine our congestion algorithm conditions.
   * In this way, we only react to one congestion event per congestion window.
   *
   * It is also reset to 0 immediately whenever the circuit's orconn is
   * blocked, and when a previously blocked orconn is unblocked.
   */
  uint64_t next_cc_event;

  /** Are we in slow start? */
  bool in_slow_start;

  /** Is the local channel blocked on us? That's a congestion signal */
  bool blocked_chan;

  /* The following parameters are cached from consensus values upon
   * circuit setup. */

  /** Percent of cwnd to increment by during slow start */
  uint16_t cwnd_inc_pct_ss;

  /** Number of cells to increment cwnd by during steady state */
  uint16_t cwnd_inc;

  /** Minimum congestion window (must be at least sendme_inc) */
  uint16_t cwnd_min;

  /**
   * Number of times per congestion window to update based on congestion
   * signals */
  uint8_t cwnd_inc_rate;

  /**
   * Number of cwnd worth of sendme acks to smooth RTT and BDP with,
   * using N_EWMA */
  uint8_t ewma_cwnd_cnt;

  /**
   * Minimum number of sendmes before we begin BDP estimates
   */
  uint8_t bwe_sendme_min;

  /**
   * Number of cells to ack with every sendme. Taken from consensus parameter
   * and negotiation during circuit setup. */
  uint8_t sendme_inc;

  /** Which congestion control algorithm to use. Taken from
   * consensus parameter and negotiation during circuit setup. */
  cc_alg_t cc_alg;

  /** Which algorithm to estimate circuit bandwidth with. Taken from
   * consensus parameter during circuit setup. */
  bdp_alg_t bdp_alg;

  /** Algorithm-specific parameters. The specific struct that is used
   * depends upon the algoritghm selected by the cc_alg parameter.
   * These should not be accessed anywhere other than the algorithm-specific
   * files. */
  union {
    struct westwood_params_t westwood_params;
    struct vegas_params_t vegas_params;
    struct nola_params_t nola_params;
  };
} congestion_control_t;

/**
 * Returns the number of sendme acks we will recieve before we update cwnd.
 *
 * Congestion control literature recommends only one update of cwnd per
 * cwnd worth of acks. However, we can also tune this to be more frequent
 * by increasing the 'cc_cwnd_inc_rate' consensus parameter.
 *
 * If this returns 0 due to high cwnd_inc_rate, the calling code will
 * update every sendme ack.
 */
static inline uint64_t CWND_UPDATE_RATE(const congestion_control_t *cc)
{
  /* We add cwnd_inc_rate*sendme_inc/2 to round to nearest integer number
   * of acks */
  return ((cc->cwnd + cc->cwnd_inc_rate*cc->sendme_inc/2)
           / (cc->cwnd_inc_rate*cc->sendme_inc));
}

/**
 * Returns the amount to increment the congestion window each update,
 * during slow start.
 *
 * Congestion control literature recommends either doubling the cwnd
 * every cwnd during slow start, or some similar exponential growth
 * (such as 50% more every cwnd, for Vegas).
 *
 * This is controlled by a consensus parameter 'cwnd_inc_pct_ss', which
 * allows us to specify the percent of the current consensus window
 * to update by.
 */
static inline uint64_t CWND_INC_SS(const congestion_control_t *cc)
{
  return (cc->cwnd_inc_pct_ss*cc->cwnd/100);
}

/**
 * Returns the amount to increment (and for Vegas, also decrement) the
 * congestion window by, every update period.
 *
 * This is controlled by the cc_cwnd_inc consensus parameter.
 */
#define CWND_INC(cc)           ((cc)->cwnd_inc)

#endif /* !defined(CONGESTION_CONTROL_ST_H) */