52 files changed, 4597 insertions, 323 deletions
diff --git a/src/core/crypto/include.am b/src/core/crypto/include.am
index 28b7e22905..2d53b3cb0b 100644
--- a/src/core/crypto/include.am
+++ b/src/core/crypto/include.am
@@ -5,6 +5,7 @@ LIBTOR_APP_A_SOURCES += 				\
 	src/core/crypto/onion_crypto.c		\
 	src/core/crypto/onion_fast.c		\
 	src/core/crypto/onion_ntor.c		\
+	src/core/crypto/onion_ntor_v3.c		\
 	src/core/crypto/onion_tap.c		\
 	src/core/crypto/relay_crypto.c
 
@@ -14,5 +15,6 @@ noinst_HEADERS +=					\
 	src/core/crypto/onion_crypto.h	        	\
 	src/core/crypto/onion_fast.h			\
 	src/core/crypto/onion_ntor.h			\
+	src/core/crypto/onion_ntor_v3.h			\
 	src/core/crypto/onion_tap.h			\
 	src/core/crypto/relay_crypto.h
diff --git a/src/core/crypto/onion_ntor_v3.c b/src/core/crypto/onion_ntor_v3.c
new file mode 100644
index 0000000000..491c69cf8d
--- /dev/null
+++ b/src/core/crypto/onion_ntor_v3.c
@@ -0,0 +1,760 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * @file onion_ntor_v3.c
+ * @brief Implements the version 3 ntor handshake as first specified in
+ * proposal 332.
+ *
+ * The v3 ntor handshake differs from the earlier versions (ntor and hs-ntor)
+ * primarily in that it allows the client to send an authenticated encrypted
+ * message as part of its onion skin, and allows the relay to send and
+ * encrypted authenticated reply as part of its response.
+ *
+ * It also takes a "verification string" -- the handshake cannot succeed
+ * unless both parties use the same value for their verification stream.
+ **/
+
+#define ONION_NTOR_V3_PRIVATE
+
+#include "orconfig.h"
+#include "core/crypto/onion_ntor_v3.h"
+
+#include "lib/arch/bytes.h"
+#include "lib/crypt_ops/crypto_digest.h"
+#include "lib/crypt_ops/crypto_rand.h"
+#include "lib/crypt_ops/crypto_util.h"
+#include "lib/ctime/di_ops.h"
+#include "lib/log/util_bug.h"
+
+#include <string.h>
+
+/* Parameters used to keep the outputs of this handshake from colliding with
+ * others.  These are defined in the specification. */
+#define PROTOID "ntor3-curve25519-sha3_256-1"
+#define TWEAK(A) (PROTOID ":" A)
+
+#define T_MSGKDF TWEAK("kdf_phase1")
+#define T_MSGMAC TWEAK("msg_mac")
+#define T_KEY_SEED TWEAK("key_seed")
+#define T_VERIFY TWEAK("verify")
+#define T_FINAL TWEAK("kdf_final")
+#define T_AUTH TWEAK("auth_final")
+
+/**
+ * Add @a len bytes of @a data as input to the provided @a xof.
+ *
+ * (This is provided just for abbreviation).
+ **/
+#define xof_add(xof, data, len) crypto_xof_add_bytes((xof), (data), (len))
+/**
+ * Add @a len bytes of @a data as input to the provided @a xof,
+ * prefixed with an encoding of the length.
+ *
+ * This is equivalent to ENCAP(data) in the spec.
+ **/
+static void
+xof_add_encap(crypto_xof_t *xof, const uint8_t *data, size_t len)
+{
+  uint64_t len64 = tor_htonll(len);
+  xof_add(xof, (uint8_t *)(&len64), 8);
+  xof_add(xof, data, len);
+}
+/**
+ * Add an encapsulated tweak to the provided xof.
+ **/
+#define xof_add_tweak(d, s) xof_add_encap((d), (const uint8_t *)(s), strlen(s))
+
+/**
+ * Add @a len bytes of @a data to the provided @a digest.
+ *
+ * This is provided as an abbreviation, and to get the types right.
+ **/
+static void
+d_add(crypto_digest_t *digest, const uint8_t *data, size_t len)
+{
+  crypto_digest_add_bytes(digest, (const char *)data, len);
+}
+/**
+ * Add @a len bytes of @a data to the provided @a digest, prefixed
+ * with the encoded length.
+ *
+ * This is equivalent to ENCAP(data) from the spec.
+ **/
+static void
+d_add_encap(crypto_digest_t *digest, const uint8_t *data, size_t len)
+{
+  uint64_t len64 = tor_htonll(len);
+  d_add(digest, (const uint8_t *)(&len64), 8);
+  d_add(digest, data, len);
+}
+/**
+ * Add an encapsulated tweak to the provided digest.
+ **/
+#define d_add_tweak(d, s) d_add_encap((d), (const uint8_t *)(s), strlen(s))
+
+/**
+ * Helper: copy @a len bytes of @a data onto *@a ptr, and advance @a ptr
+ * forward by @a len bytes.
+ *
+ * Asserts that @a ptr will not be advanced beyond @a endptr.
+ **/
+static void
+push(uint8_t **ptr, const uint8_t *endptr, const uint8_t *data, size_t len)
+{
+  size_t remaining = endptr - *ptr;
+  tor_assert(len <= remaining);
+  memcpy(*ptr, data, len);
+  *ptr += len;
+}
+
+/**
+ * Helper: Drop storage held by @a state, after wiping it.
+ **/
+void
+ntor3_handshake_state_free_(ntor3_handshake_state_t *state)
+{
+  if (!state)
+    return;
+
+  memwipe(state, 0, sizeof(*state));
+  tor_free(state);
+}
+
+/**
+ * Perform a client-side v3 ntor handshake with a given relay.
+ *
+ * As inputs this function takes the relay's Ed25519 identity (@a relay_id),
+ * the relay's current ntor onion key (@a relay_key), a verification string
+ * (@a verification_len bytes at @a verification), and a message to send
+ * as part of the handshake (@a message_len bytes at @a message).
+ *
+ * The message will be encrypted and authenticated to the relay, but will not
+ * receive the same forward secrecy as the rest of the handshake.  We should
+ * not put any super-confidential data in it.
+ *
+ * The handshake will only succeed if the relay uses the same verification
+ * string as we are using.
+ *
+ * As outputs, this function returns 0 on success and -1 on failure.  On
+ * success, it sets @a onion_skin_out and @a onion_skin_len_out to a newly
+ * allocated handshake message that the client can send as part of its CREATE2
+ * or EXTEND2 cell.  It also sets it sets @a handshake_state_out to a newly
+ * allocated handshake state object; the client needs to use this object to
+ * process the relay's eventual reply.
+ **/
+int
+onion_skin_ntor3_create(const ed25519_public_key_t *relay_id,
+                        const curve25519_public_key_t *relay_key,
+                        const uint8_t *verification,
+                        const size_t verification_len,
+                        const uint8_t *message,
+                        const size_t message_len,
+                        ntor3_handshake_state_t **handshake_state_out,
+                        uint8_t **onion_skin_out,
+                        size_t *onion_skin_len_out)
+{
+  curve25519_keypair_t client_keypair;
+  if (curve25519_keypair_generate(&client_keypair, 0) < 0) {
+    return -1;
+  }
+  int r = onion_skin_ntor3_create_nokeygen(
+                                   &client_keypair,
+                                   relay_id,
+                                   relay_key,
+                                   verification,
+                                   verification_len,
+                                   message,
+                                   message_len,
+                                   handshake_state_out,
+                                   onion_skin_out,
+                                   onion_skin_len_out);
+  memwipe(&client_keypair, 0, sizeof(client_keypair));
+  return r;
+}
+
+/**
+ * Like onion_skin_ntor3_create, but do not generate a new ephemeral keypair.
+ * Instead, take the ephemeral keypair (x,X) from @a client_keypair.
+ *
+ * (Having a separate function for this lets us test the code for correct
+ * behavior.)
+ **/
+STATIC int
+onion_skin_ntor3_create_nokeygen(
+                        const curve25519_keypair_t *client_keypair,
+                        const ed25519_public_key_t *relay_id,
+                        const curve25519_public_key_t *relay_key,
+                        const uint8_t *verification,
+                        const size_t verification_len,
+                        const uint8_t *message,
+                        const size_t message_len,
+                        ntor3_handshake_state_t **handshake_state_out,
+                        uint8_t **onion_skin_out,
+                        size_t *onion_skin_len_out)
+{
+  *handshake_state_out = NULL;
+  *onion_skin_out = NULL;
+  *onion_skin_len_out = 0;
+
+  // Set up the handshake state object.
+  *handshake_state_out = tor_malloc_zero(sizeof(ntor3_handshake_state_t));
+  memcpy(&(*handshake_state_out)->client_keypair, client_keypair,
+         sizeof(*client_keypair));
+  memcpy(&(*handshake_state_out)->relay_id, relay_id, sizeof(*relay_id));
+  memcpy(&(*handshake_state_out)->relay_key, relay_key, sizeof(*relay_key));
+
+  // Perform the first DH handshake.
+  curve25519_handshake((*handshake_state_out)->bx,
+                       &client_keypair->seckey, relay_key);
+  if (safe_mem_is_zero((*handshake_state_out)->bx, CURVE25519_OUTPUT_LEN)) {
+    // Okay to return early here, since our behavior here doesn't
+    // cause a visible timing sidechannel.
+    return -1;
+  }
+
+  // Compute phase1_keys.
+  uint8_t enc_key[CIPHER256_KEY_LEN];
+  uint8_t mac_key[DIGEST256_LEN];
+  {
+    crypto_xof_t *xof = crypto_xof_new();
+    // secret_input_phase1 = Bx | ID | X | B | PROTOID | ENCAP(VER)
+    xof_add_tweak(xof, T_MSGKDF);
+    xof_add(xof, (*handshake_state_out)->bx, CURVE25519_OUTPUT_LEN);
+    xof_add(xof, relay_id->pubkey, ED25519_PUBKEY_LEN);
+    xof_add(xof, client_keypair->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+    xof_add(xof, relay_key->public_key, CURVE25519_PUBKEY_LEN);
+    xof_add(xof, (const uint8_t *)PROTOID, strlen(PROTOID));
+    xof_add_encap(xof, verification, verification_len);
+    crypto_xof_squeeze_bytes(xof, enc_key, sizeof(enc_key));
+    crypto_xof_squeeze_bytes(xof, mac_key, sizeof(mac_key));
+    crypto_xof_free(xof);
+  }
+
+  // Compute encrypted message.
+  uint8_t *encrypted_message = tor_memdup(message, message_len);
+  {
+    crypto_cipher_t *c =
+      crypto_cipher_new_with_bits((const char *)enc_key, 256);
+    crypto_cipher_crypt_inplace(c, (char *)encrypted_message, message_len);
+    crypto_cipher_free(c);
+  }
+
+  // Compute the MAC value.
+  {
+    crypto_digest_t *m = crypto_digest256_new(DIGEST_SHA3_256);
+    d_add_tweak(m, T_MSGMAC);
+    d_add_encap(m, mac_key, sizeof(mac_key));
+    d_add(m, relay_id->pubkey, ED25519_PUBKEY_LEN);
+    d_add(m, relay_key->public_key, CURVE25519_PUBKEY_LEN);
+    d_add(m, client_keypair->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+    d_add(m, encrypted_message, message_len);
+    crypto_digest_get_digest(m,
+                             (char *)(*handshake_state_out)->msg_mac,
+                             DIGEST256_LEN);
+    crypto_digest_free(m);
+  }
+
+  // Build the onionskin.
+  *onion_skin_len_out = (ED25519_PUBKEY_LEN + CURVE25519_PUBKEY_LEN*2 +
+                          DIGEST256_LEN + message_len);
+  *onion_skin_out = tor_malloc(*onion_skin_len_out);
+  {
+    uint8_t *ptr = *onion_skin_out, *end = ptr + *onion_skin_len_out;
+
+    push(&ptr, end, relay_id->pubkey, ED25519_PUBKEY_LEN);
+    push(&ptr, end, relay_key->public_key, CURVE25519_PUBKEY_LEN);
+    push(&ptr, end, client_keypair->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+    push(&ptr, end, encrypted_message, message_len);
+    push(&ptr, end, (*handshake_state_out)->msg_mac, DIGEST256_LEN);
+    tor_assert(ptr == end);
+  }
+
+  memwipe(&enc_key, 0, sizeof(enc_key));
+  memwipe(&mac_key, 0, sizeof(mac_key));
+  memwipe(encrypted_message, 0, message_len);
+  tor_free(encrypted_message);
+
+  return 0;
+}
+
+/**
+ * Complete a client-side v3 ntor handshake.
+ *
+ * Takes a @a handshake_state returned earlier by `onion_skin_ntor3_create()`,
+ * and the relay's reply to that handshake (@a reply_len bytes at @a
+ * handshake_reply).  Also takes a verification string (@a verification_len
+ * bytes at @a verification).
+ *
+ * Returns 0 on success and -1 on failure. On success, generates @a key_len
+ * bytes of key material into the provided @a keys_out buffer, and sets @a
+ * message_out to the message that the relay sent in reply to our message (and
+ * sets @a message_out_len to that message's length).
+ **/
+int
+onion_ntor3_client_handshake(const ntor3_handshake_state_t *handshake_state,
+                             const uint8_t *handshake_reply,
+                             size_t reply_len,
+                             const uint8_t *verification,
+                             size_t verification_len,
+                             uint8_t *keys_out,
+                             size_t keys_out_len,
+                             uint8_t **message_out,
+                             size_t *message_len_out)
+{
+  *message_out = NULL;
+  *message_len_out = 0;
+
+  int problems = 0;
+
+  // Parse the relay's message.
+  curve25519_public_key_t relay_Y;
+  uint8_t relay_auth[DIGEST256_LEN];
+  size_t encrypted_msg_len;
+  const uint8_t *encrypted_msg;
+  {
+    if (reply_len < CURVE25519_PUBKEY_LEN + DIGEST256_LEN) {
+      // Okay to return early here, since the message is completely
+      // ill-formed, so we can't leak anything.
+      ++problems;
+      goto done;
+    }
+    encrypted_msg_len = reply_len - (CURVE25519_PUBKEY_LEN + DIGEST256_LEN);
+
+    memcpy(&relay_Y.public_key, handshake_reply, CURVE25519_PUBKEY_LEN);
+    handshake_reply += CURVE25519_PUBKEY_LEN;
+    memcpy(&relay_auth, handshake_reply, DIGEST256_LEN);
+    handshake_reply += DIGEST256_LEN;
+    encrypted_msg = handshake_reply;
+  }
+
+  // Finish the second diffie hellman handshake.
+  uint8_t yx[CURVE25519_OUTPUT_LEN];
+  curve25519_handshake(yx, &handshake_state->client_keypair.seckey, &relay_Y);
+  problems |= safe_mem_is_zero(yx, sizeof(yx));
+
+  // Compute two tweaked hashes of secret_input.
+  uint8_t key_seed[DIGEST256_LEN], verify[DIGEST256_LEN];
+  {
+    crypto_digest_t *ks = crypto_digest256_new(DIGEST_SHA3_256);
+    crypto_digest_t *v = crypto_digest256_new(DIGEST_SHA3_256);
+    d_add_tweak(ks, T_KEY_SEED);
+    d_add_tweak(v, T_VERIFY);
+#define ADD2(s,len) STMT_BEGIN { \
+      d_add(ks, (s),(len)); d_add(v, (s), (len));       \
+    } STMT_END
+#define ADD2_ENCAP(s,len) STMT_BEGIN { \
+        d_add_encap(ks, (s),(len)); d_add_encap(v, (s), (len)); \
+      } STMT_END
+
+    ADD2(yx, sizeof(yx));
+    ADD2(handshake_state->bx, sizeof(handshake_state->bx));
+    ADD2(handshake_state->relay_id.pubkey, ED25519_PUBKEY_LEN);
+    ADD2(handshake_state->relay_key.public_key, CURVE25519_PUBKEY_LEN);
+    ADD2(handshake_state->client_keypair.pubkey.public_key,
+         CURVE25519_PUBKEY_LEN);
+    ADD2(relay_Y.public_key, CURVE25519_PUBKEY_LEN);
+    ADD2((const uint8_t *)PROTOID, strlen(PROTOID));
+    ADD2_ENCAP(verification, verification_len);
+
+    crypto_digest_get_digest(ks, (char*) key_seed, DIGEST256_LEN);
+    crypto_digest_get_digest(v, (char*) verify, DIGEST256_LEN);
+    crypto_digest_free(ks);
+    crypto_digest_free(v);
+  }
+
+  // compute expected auth value.
+  uint8_t auth_computed[DIGEST256_LEN];
+  {
+    crypto_digest_t *d = crypto_digest256_new(DIGEST_SHA3_256);
+    d_add_tweak(d, T_AUTH);
+    d_add(d, verify, sizeof(verify));
+    d_add(d, handshake_state->relay_id.pubkey, ED25519_PUBKEY_LEN);
+    d_add(d, handshake_state->relay_key.public_key, CURVE25519_PUBKEY_LEN);
+    d_add(d, relay_Y.public_key, CURVE25519_PUBKEY_LEN);
+    d_add(d, handshake_state->client_keypair.pubkey.public_key,
+          CURVE25519_PUBKEY_LEN);
+    d_add(d, handshake_state->msg_mac, DIGEST256_LEN);
+    d_add_encap(d, encrypted_msg, encrypted_msg_len);
+    d_add(d, (const uint8_t*)PROTOID, strlen(PROTOID));
+    d_add(d, (const uint8_t*)"Server", strlen("Server"));
+    crypto_digest_get_digest(d, (char *)auth_computed, DIGEST256_LEN);
+    crypto_digest_free(d);
+  }
+
+  // Check authentication value.
+  problems |= tor_memneq(auth_computed, relay_auth, DIGEST256_LEN);
+
+  // Compute keystream, decrypt message, and return.
+  *message_out = tor_malloc(encrypted_msg_len);
+  *message_len_out = encrypted_msg_len;
+  uint8_t enc_key[CIPHER256_KEY_LEN];
+  {
+    crypto_xof_t *xof = crypto_xof_new();
+    xof_add_tweak(xof, T_FINAL);
+    xof_add(xof, key_seed, sizeof(key_seed));
+    crypto_xof_squeeze_bytes(xof, enc_key, sizeof(enc_key));
+    crypto_xof_squeeze_bytes(xof, (uint8_t *)keys_out, keys_out_len);
+    crypto_xof_free(xof);
+
+    crypto_cipher_t *c =
+      crypto_cipher_new_with_bits((const char *)enc_key, 256);
+    crypto_cipher_decrypt(c, (char *)*message_out,
+                          (const char *)encrypted_msg, encrypted_msg_len);
+    crypto_cipher_free(c);
+  }
+
+ done:
+  memwipe(&relay_Y, 0, sizeof(relay_Y));
+  memwipe(&relay_auth, 0, sizeof(relay_auth));
+  memwipe(&yx, 0, sizeof(yx));
+  memwipe(key_seed, 0, sizeof(key_seed));
+  memwipe(verify, 0, sizeof(verify));
+  memwipe(enc_key, 0, sizeof(enc_key));
+  if (problems) {
+    if (*message_out) {
+      memwipe(*message_out, 0, *message_len_out);
+      tor_free(*message_out); // Sets it to NULL.
+    }
+    *message_len_out = 0;
+    crypto_rand((char*)keys_out, keys_out_len); // In case bad code uses it.
+    return -1;
+  }
+
+  return 0;
+}
+
+/**
+ * Wipe a server handshake state, and release the storage it holds.
+ **/
+void
+ntor3_server_handshake_state_free_(ntor3_server_handshake_state_t *state)
+{
+  if (state == NULL)
+    return;
+
+  memwipe(state, 0, sizeof(ntor3_server_handshake_state_t));
+  tor_free(state);
+}
+
+/**
+ * As a relay, start handling a client's v3 ntor handshake.
+ *
+ * This function performs the _first half_ of the handshake, up to the point
+ * where the client's message is decoded.  After calling it, the relay should
+ * decide how and whether to reply to the client's message, compose its reply,
+ * and call `onion_skin_ntor3_server_handshake_part2`.
+ *
+ * It takes as input a map of the relay's known onion keys in @a private_keys,
+ * along with a fake @a junk_key to use if there is a complete mismatch.  It
+ * takes the relay's ed25519 identity in @a my_id, along with the client's
+ * handshake message (@a client_handshake_len bytes in @a client_handshake),
+ * and a verification string (@a verification_len bytes in @a verification).
+ *
+ * Return 0 on success, and -1 on failure.  On success, sets @a
+ * client_message_out to a newly allocated string holding the plaintext of the
+ * message that the client sent as part of its handshake, and @a
+ * client_message_out_len to its length. Also sets @a state_out to a newly
+ * allocated state object holding the intermediate computation for this
+ * handshake.
+ **/
+int
+onion_skin_ntor3_server_handshake_part1(
+                const di_digest256_map_t *private_keys,
+                const curve25519_keypair_t *junk_key,
+                const ed25519_public_key_t *my_id,
+                const uint8_t *client_handshake,
+                size_t client_handshake_len,
+                const uint8_t *verification,
+                size_t verification_len,
+                uint8_t **client_message_out,
+                size_t *client_message_len_out,
+                ntor3_server_handshake_state_t **state_out)
+{
+  *client_message_out = NULL;
+  *client_message_len_out = 0;
+  *state_out = NULL;
+
+  int problems = 0;
+
+  // Initialize state.
+  (*state_out) = tor_malloc_zero(sizeof(ntor3_server_handshake_state_t));
+  memcpy(&(*state_out)->my_id, my_id, sizeof(*my_id));
+
+  const uint8_t *wanted_id; // [ED25519_PUBKEY_LEN]
+  const uint8_t *wanted_key; // [CURVE25519_PUBKEY_LEN]
+  const uint8_t *encrypted_message;
+  size_t encrypted_message_len;
+  // Unpack the client handshake.
+  {
+    const uint8_t *ptr = client_handshake;
+    const uint8_t *end = ptr + client_handshake_len;
+
+    if (client_handshake_len <
+        ED25519_PUBKEY_LEN + CURVE25519_PUBKEY_LEN * 2 + DIGEST256_LEN) {
+      // Okay to end early; the client knows this is unparseable already.
+      ++problems;
+      goto done;
+    }
+    wanted_id = ptr;
+    ptr += ED25519_PUBKEY_LEN;
+    wanted_key = ptr;
+    ptr += CURVE25519_PUBKEY_LEN;
+    memcpy((*state_out)->client_key.public_key, ptr, CURVE25519_PUBKEY_LEN);
+    ptr += CURVE25519_PUBKEY_LEN;
+    size_t remaining = (end-ptr);
+    if (BUG(remaining < DIGEST256_LEN)) {
+      // Okay to end early; this is a bug.
+      ++problems;
+      goto done;
+    }
+    encrypted_message = ptr;
+    encrypted_message_len = remaining - DIGEST256_LEN;
+    ptr += encrypted_message_len;
+    remaining = (end-ptr);
+    tor_assert(remaining == DIGEST256_LEN);
+    memcpy((*state_out)->msg_mac, ptr, DIGEST256_LEN);
+  }
+
+  // Check the identity.
+  problems |= tor_memneq(my_id->pubkey, wanted_id, ED25519_PUBKEY_LEN);
+
+  // Find the correct keypair.
+  const curve25519_keypair_t *keypair =
+    dimap_search(private_keys, wanted_key, (void *)junk_key);
+  tor_assert(keypair);
+  memcpy(&(*state_out)->my_key, &keypair->pubkey,
+         sizeof(curve25519_public_key_t));
+
+  // Do the first diffie hellman handshake.
+  curve25519_handshake((*state_out)->xb,
+                       &keypair->seckey, &(*state_out)->client_key);
+  problems |= safe_mem_is_zero((*state_out)->xb, CURVE25519_OUTPUT_LEN);
+
+  // Derive the encryption and mac keys
+  uint8_t enc_key[CIPHER256_KEY_LEN], mac_key[DIGEST256_LEN];
+  {
+    crypto_xof_t *xof = crypto_xof_new();
+    xof_add_tweak(xof, T_MSGKDF);
+    xof_add(xof, (*state_out)->xb, CURVE25519_OUTPUT_LEN);
+    xof_add(xof, wanted_id, ED25519_PUBKEY_LEN);
+    xof_add(xof, (*state_out)->client_key.public_key, CURVE25519_PUBKEY_LEN);
+    xof_add(xof, keypair->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+    xof_add(xof, (const uint8_t *)PROTOID, strlen(PROTOID));
+    xof_add_encap(xof, verification, verification_len);
+    crypto_xof_squeeze_bytes(xof, enc_key, sizeof(enc_key));
+    crypto_xof_squeeze_bytes(xof, mac_key, sizeof(mac_key));
+    crypto_xof_free(xof);
+  }
+
+  // Check the MAC.
+  uint8_t computed_mac[DIGEST256_LEN];
+  {
+    crypto_digest_t *d = crypto_digest256_new(DIGEST_SHA3_256);
+    d_add_tweak(d, T_MSGMAC);
+    d_add_encap(d, mac_key, sizeof(mac_key));
+    d_add(d, my_id->pubkey, ED25519_PUBKEY_LEN);
+    d_add(d, keypair->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+    d_add(d, (*state_out)->client_key.public_key, CURVE25519_PUBKEY_LEN);
+    d_add(d, encrypted_message, encrypted_message_len);
+    crypto_digest_get_digest(d, (char *)computed_mac, DIGEST256_LEN);
+    crypto_digest_free(d);
+  }
+
+  problems |= tor_memneq((*state_out)->msg_mac, computed_mac, DIGEST256_LEN);
+
+  // Decrypt the message.
+  *client_message_out = tor_malloc(encrypted_message_len);
+  *client_message_len_out = encrypted_message_len;
+  {
+    crypto_cipher_t *c =
+      crypto_cipher_new_with_bits((const char *)enc_key, 256);
+    crypto_cipher_decrypt(c, (char *)*client_message_out,
+                          (const char *)encrypted_message,
+                          encrypted_message_len);
+    crypto_cipher_free(c);
+  }
+
+ done:
+  memwipe(enc_key, 0, sizeof(enc_key));
+  memwipe(mac_key, 0, sizeof(mac_key));
+  memwipe(computed_mac, 0, sizeof(computed_mac));
+  if (problems) {
+    if (*client_message_out) {
+      memwipe(*client_message_out, 0, *client_message_len_out);
+      tor_free(*client_message_out); // Sets it to NULL.
+    }
+    *client_message_len_out = 0;
+    ntor3_server_handshake_state_free(*state_out);
+    return -1;
+  }
+
+  return 0;
+}
+
+/**
+ * Finish the relay side of an ntor v3 handshake.
+ *
+ * The relay calls this function after it has decided to respond to the
+ * client's original encrypted message.  This function receives the relay's
+ * message in @a server_message and its length in @a server_message_len, and
+ * completes the handshake.
+ *
+ * Returns 0 on success and -1 on failure. On success, stores the newly
+ * allocated handshake for the relay to send in @a handshake_out, and its
+ * length in @a handshake_len_out.  Stores @a keys_out_len bytes of generated
+ * keys in the provided buffer at @a keys_out.
+ **/
+int
+onion_skin_ntor3_server_handshake_part2(
+                const ntor3_server_handshake_state_t *state,
+                const uint8_t *verification,
+                size_t verification_len,
+                const uint8_t *server_message,
+                size_t server_message_len,
+                uint8_t **handshake_out,
+                size_t *handshake_len_out,
+                uint8_t *keys_out,
+                size_t keys_out_len)
+{
+  curve25519_keypair_t relay_keypair;
+  if (curve25519_keypair_generate(&relay_keypair, 0) < 0) {
+    return -1;
+  }
+  int r = onion_skin_ntor3_server_handshake_part2_nokeygen(
+              &relay_keypair,
+              state,
+              verification,
+              verification_len,
+              server_message,
+              server_message_len,
+              handshake_out,
+              handshake_len_out,
+              keys_out,
+              keys_out_len);
+  memwipe(&relay_keypair, 0, sizeof(relay_keypair));
+  return r;
+}
+
+/**
+ * Like `onion_skin_ntor3_server_handshake_part2`, but do not generate
+ * an ephemeral (y,Y) keypair.
+ *
+ * Instead, this function takes that keypair as @a relay_keypair_y.
+ *
+ * (Having a separate function for this lets us test the code for correct
+ * behavior.)
+ **/
+STATIC int
+onion_skin_ntor3_server_handshake_part2_nokeygen(
+                const curve25519_keypair_t *relay_keypair_y,
+                const ntor3_server_handshake_state_t *state,
+                const uint8_t *verification,
+                size_t verification_len,
+                const uint8_t *server_message,
+                size_t server_message_len,
+                uint8_t **handshake_out,
+                size_t *handshake_len_out,
+                uint8_t *keys_out,
+                size_t keys_out_len)
+{
+  *handshake_out = NULL;
+  *handshake_len_out = 0;
+
+  int problems = 0;
+
+  // Second diffie-hellman handshake.
+  uint8_t xy[CURVE25519_OUTPUT_LEN];
+  curve25519_handshake(xy, &relay_keypair_y->seckey, &state->client_key);
+  problems |= safe_mem_is_zero(xy, sizeof(xy));
+
+  // Compute two tweaked hashes of secret_input.
+  uint8_t key_seed[DIGEST256_LEN], verify[DIGEST256_LEN];
+  {
+    crypto_digest_t *ks = crypto_digest256_new(DIGEST_SHA3_256);
+    crypto_digest_t *v = crypto_digest256_new(DIGEST_SHA3_256);
+    d_add_tweak(ks, T_KEY_SEED);
+    d_add_tweak(v, T_VERIFY);
+    ADD2(xy, sizeof(xy));
+    ADD2(state->xb, sizeof(state->xb));
+    ADD2(state->my_id.pubkey, ED25519_PUBKEY_LEN);
+    ADD2(state->my_key.public_key, CURVE25519_PUBKEY_LEN);
+    ADD2(state->client_key.public_key, CURVE25519_PUBKEY_LEN);
+    ADD2(relay_keypair_y->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+    ADD2((const uint8_t *)PROTOID, strlen(PROTOID));
+    ADD2_ENCAP(verification, verification_len);
+    crypto_digest_get_digest(ks, (char*) key_seed, DIGEST256_LEN);
+    crypto_digest_get_digest(v, (char*) verify, DIGEST256_LEN);
+    crypto_digest_free(ks);
+    crypto_digest_free(v);
+  }
+
+  // Compute enc_key and keystream.
+  uint8_t enc_key[CIPHER256_KEY_LEN];
+  {
+    crypto_xof_t *xof = crypto_xof_new();
+    xof_add_tweak(xof, T_FINAL);
+    xof_add(xof, key_seed, sizeof(key_seed));
+    crypto_xof_squeeze_bytes(xof, enc_key, sizeof(enc_key));
+    crypto_xof_squeeze_bytes(xof, keys_out, keys_out_len);
+    crypto_xof_free(xof);
+  }
+
+  // Encrypt message.
+  uint8_t *encrypted_message = tor_memdup(server_message, server_message_len);
+  {
+    crypto_cipher_t *c =
+      crypto_cipher_new_with_bits((const char *)enc_key, 256);
+    crypto_cipher_crypt_inplace(
+                c, (char *)encrypted_message, server_message_len);
+    crypto_cipher_free(c);
+  }
+
+  // Compute AUTH digest.
+  uint8_t auth[DIGEST256_LEN];
+  {
+    crypto_digest_t *d = crypto_digest256_new(DIGEST_SHA3_256);
+    d_add_tweak(d, T_AUTH);
+    d_add(d, verify, sizeof(verify));
+    d_add(d, state->my_id.pubkey, ED25519_PUBKEY_LEN);
+    d_add(d, state->my_key.public_key, CURVE25519_PUBKEY_LEN);
+    d_add(d, relay_keypair_y->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+    d_add(d, state->client_key.public_key, CURVE25519_PUBKEY_LEN);
+    d_add(d, state->msg_mac, DIGEST256_LEN);
+    d_add_encap(d, encrypted_message, server_message_len);
+    d_add(d, (const uint8_t*)PROTOID, strlen(PROTOID));
+    d_add(d, (const uint8_t*)"Server", strlen("Server"));
+    crypto_digest_get_digest(d, (char *)auth, DIGEST256_LEN);
+    crypto_digest_free(d);
+  }
+
+  // Compose the reply.
+  *handshake_len_out = CURVE25519_PUBKEY_LEN + DIGEST256_LEN +
+    server_message_len;
+  *handshake_out = tor_malloc(*handshake_len_out);
+  uint8_t *ptr = *handshake_out, *end = ptr + *handshake_len_out;
+  push(&ptr, end, relay_keypair_y->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+  push(&ptr, end, auth, sizeof(auth));
+  push(&ptr, end, encrypted_message, server_message_len);
+  tor_assert(ptr == end);
+
+  // Clean up and return.
+  memwipe(xy, 0, sizeof(xy));
+  memwipe(key_seed, 0, sizeof(key_seed));
+  memwipe(verify, 0, sizeof(verify));
+  memwipe(enc_key, 0, sizeof(enc_key));
+  memwipe(encrypted_message, 0, server_message_len);
+  tor_free(encrypted_message);
+
+  if (problems) {
+    memwipe(*handshake_out, 0, *handshake_len_out);
+    tor_free(*handshake_out); // Sets it to NULL.
+    *handshake_len_out = 0;
+    crypto_rand((char*)keys_out, keys_out_len); // In case bad code uses it.
+    return -1;
+  }
+  return 0;
+}
diff --git a/src/core/crypto/onion_ntor_v3.h b/src/core/crypto/onion_ntor_v3.h
new file mode 100644
index 0000000000..4449eb237d
--- /dev/null
+++ b/src/core/crypto/onion_ntor_v3.h
@@ -0,0 +1,140 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * @file onion_ntor_v3.h
+ * @brief Header for core/crypto/onion_ntor_v3.c
+ **/
+
+#ifndef TOR_CORE_CRYPTO_ONION_NTOR_V3_H
+#define TOR_CORE_CRYPTO_ONION_NTOR_V3_H
+
+#include "lib/cc/torint.h"
+#include "lib/testsupport/testsupport.h"
+#include "lib/crypt_ops/crypto_cipher.h"
+#include "lib/crypt_ops/crypto_curve25519.h"
+#include "lib/crypt_ops/crypto_ed25519.h"
+#include "lib/malloc/malloc.h"
+
+/**
+ * Client-side state held while an ntor v3 handshake is in progress.
+ **/
+typedef struct ntor3_handshake_state_t ntor3_handshake_state_t;
+
+/**
+ * Server-side state held while the relay is handling a client's
+ * encapsulated message, before replying to the v3 handshake.
+ **/
+typedef struct ntor3_server_handshake_state_t ntor3_server_handshake_state_t;
+
+void ntor3_handshake_state_free_(ntor3_handshake_state_t *st);
+#define ntor3_handshake_state_free(ptr) \
+  FREE_AND_NULL(ntor3_handshake_state_t, ntor3_handshake_state_free_, (ptr))
+void ntor3_server_handshake_state_free_(ntor3_server_handshake_state_t *st);
+#define ntor3_server_handshake_state_free(ptr) \
+  FREE_AND_NULL(ntor3_server_handshake_state_t, \
+                ntor3_server_handshake_state_free_, (ptr))
+
+int onion_skin_ntor3_create(const ed25519_public_key_t *relay_id,
+                            const curve25519_public_key_t *relay_key,
+                            const uint8_t *verification,
+                            const size_t verification_len,
+                            const uint8_t *message,
+                            const size_t message_len,
+                            ntor3_handshake_state_t **handshake_state_out,
+                            uint8_t **onion_skin_out,
+                            size_t *onion_skin_len_out);
+
+int onion_ntor3_client_handshake(
+                             const ntor3_handshake_state_t *handshake_state,
+                             const uint8_t *handshake_reply,
+                             size_t reply_len,
+                             const uint8_t *verification,
+                             size_t verification_len,
+                             uint8_t *keys_out,
+                             size_t keys_out_len,
+                             uint8_t **message_out,
+                             size_t *message_len_out);
+
+struct di_digest256_map_t;
+int onion_skin_ntor3_server_handshake_part1(
+                const struct di_digest256_map_t *private_keys,
+                const curve25519_keypair_t *junk_key,
+                const ed25519_public_key_t *my_id,
+                const uint8_t *client_handshake,
+                size_t client_handshake_len,
+                const uint8_t *verification,
+                size_t verification_len,
+                uint8_t **client_message_out,
+                size_t *client_message_len_out,
+                ntor3_server_handshake_state_t **state_out);
+
+int onion_skin_ntor3_server_handshake_part2(
+                const ntor3_server_handshake_state_t *state,
+                const uint8_t *verification,
+                size_t verification_len,
+                const uint8_t *server_message,
+                size_t server_message_len,
+                uint8_t **handshake_out,
+                size_t *handshake_len_out,
+                uint8_t *keys_out,
+                size_t keys_out_len);
+
+#ifdef ONION_NTOR_V3_PRIVATE
+struct ntor3_handshake_state_t {
+  /** Ephemeral (x,X) keypair. */
+  curve25519_keypair_t client_keypair;
+  /** Relay's ed25519 identity key (ID) */
+  ed25519_public_key_t relay_id;
+  /** Relay's public key (B) */
+  curve25519_public_key_t relay_key;
+  /** Shared secret (Bx). */
+  uint8_t bx[CURVE25519_OUTPUT_LEN];
+  /** MAC of the client's encrypted message data (MAC) */
+  uint8_t msg_mac[DIGEST256_LEN];
+};
+
+struct ntor3_server_handshake_state_t {
+  /** Relay's ed25519 identity key (ID) */
+  ed25519_public_key_t my_id;
+  /** Relay's public key (B) */
+  curve25519_public_key_t my_key;
+  /** Client's public ephemeral key (X). */
+  curve25519_public_key_t client_key;
+
+  /** Shared secret (Xb) */
+  uint8_t xb[CURVE25519_OUTPUT_LEN];
+  /** MAC of the client's encrypted message data */
+  uint8_t msg_mac[DIGEST256_LEN];
+};
+
+STATIC int onion_skin_ntor3_create_nokeygen(
+                        const curve25519_keypair_t *client_keypair,
+                        const ed25519_public_key_t *relay_id,
+                        const curve25519_public_key_t *relay_key,
+                        const uint8_t *verification,
+                        const size_t verification_len,
+                        const uint8_t *message,
+                        const size_t message_len,
+                        ntor3_handshake_state_t **handshake_state_out,
+                        uint8_t **onion_skin_out,
+                        size_t *onion_skin_len_out);
+
+STATIC int onion_skin_ntor3_server_handshake_part2_nokeygen(
+                const curve25519_keypair_t *relay_keypair_y,
+                const ntor3_server_handshake_state_t *state,
+                const uint8_t *verification,
+                size_t verification_len,
+                const uint8_t *server_message,
+                size_t server_message_len,
+                uint8_t **handshake_out,
+                size_t *handshake_len_out,
+                uint8_t *keys_out,
+                size_t keys_out_len);
+
+#endif
+
+#endif /* !defined(TOR_CORE_CRYPTO_ONION_NTOR_V3_H) */
diff --git a/src/core/mainloop/connection.c b/src/core/mainloop/connection.c
index b17d7bf2bd..9271a70914 100644
--- a/src/core/mainloop/connection.c
+++ b/src/core/mainloop/connection.c
@@ -117,6 +117,7 @@
 #include "lib/cc/ctassert.h"
 #include "lib/sandbox/sandbox.h"
 #include "lib/net/buffers_net.h"
+#include "lib/net/address.h"
 #include "lib/tls/tortls.h"
 #include "lib/evloop/compat_libevent.h"
 #include "lib/compress/compress.h"
@@ -146,6 +147,8 @@
 #include "feature/nodelist/routerinfo_st.h"
 #include "core/or/socks_request_st.h"
 
+#include "core/or/congestion_control_flow.h"
+
 /**
  * On Windows and Linux we cannot reliably bind() a socket to an
  * address and port if: 1) There's already a socket bound to wildcard
@@ -250,13 +253,13 @@ CONST_TO_LISTENER_CONN(const connection_t *c)
 }
 
 size_t
-connection_get_inbuf_len(connection_t *conn)
+connection_get_inbuf_len(const connection_t *conn)
 {
   return conn->inbuf ? buf_datalen(conn->inbuf) : 0;
 }
 
 size_t
-connection_get_outbuf_len(connection_t *conn)
+connection_get_outbuf_len(const connection_t *conn)
 {
     return conn->outbuf ? buf_datalen(conn->outbuf) : 0;
 }
@@ -612,6 +615,11 @@ entry_connection_new(int type, int socket_family)
     entry_conn->entry_cfg.ipv4_traffic = 1;
   else if (socket_family == AF_INET6)
     entry_conn->entry_cfg.ipv6_traffic = 1;
+
+  /* Initialize the read token bucket to the maximum value which is the same as
+   * no rate limiting. */
+  token_bucket_rw_init(&ENTRY_TO_EDGE_CONN(entry_conn)->bucket, INT32_MAX,
+                       INT32_MAX, monotime_coarse_get_stamp());
   return entry_conn;
 }
 
@@ -623,6 +631,10 @@ edge_connection_new(int type, int socket_family)
   edge_connection_t *edge_conn = tor_malloc_zero(sizeof(edge_connection_t));
   tor_assert(type == CONN_TYPE_EXIT);
   connection_init(time(NULL), TO_CONN(edge_conn), type, socket_family);
+  /* Initialize the read token bucket to the maximum value which is the same as
+   * no rate limiting. */
+  token_bucket_rw_init(&edge_conn->bucket, INT32_MAX, INT32_MAX,
+                       monotime_coarse_get_stamp());
   return edge_conn;
 }
 
@@ -1261,7 +1273,7 @@ socket_failed_from_resource_exhaustion(void)
    */
   if (get_max_sockets() > 65535) {
     /* TCP port exhaustion */
-    rep_hist_note_overload(OVERLOAD_GENERAL);
+    rep_hist_note_tcp_exhaustion();
   } else {
     /* File descriptor exhaustion */
     rep_hist_note_overload(OVERLOAD_FD_EXHAUSTED);
@@ -3457,6 +3469,19 @@ connection_bucket_read_limit(connection_t *conn, time_t now)
     base = get_cell_network_size(or_conn->wide_circ_ids);
   }
 
+  /* Edge connection have their own read bucket due to flow control being able
+   * to set a rate limit for them. However, for exit connections, we still need
+   * to honor the global bucket as well. */
+  if (CONN_IS_EDGE(conn)) {
+    const edge_connection_t *edge_conn = CONST_TO_EDGE_CONN(conn);
+    conn_bucket = token_bucket_rw_get_read(&edge_conn->bucket);
+    if (conn->type == CONN_TYPE_EXIT) {
+      /* Decide between our limit and the global one. */
+      goto end;
+    }
+    return conn_bucket;
+  }
+
   if (!connection_is_rate_limited(conn)) {
     /* be willing to read on local conns even if our buckets are empty */
     return conn_bucket>=0 ? conn_bucket : 1<<14;
@@ -3467,6 +3492,7 @@ connection_bucket_read_limit(connection_t *conn, time_t now)
     global_bucket_val = MIN(global_bucket_val, relayed);
   }
 
+ end:
   return connection_bucket_get_share(base, priority,
                                      global_bucket_val, conn_bucket);
 }
@@ -3644,6 +3670,13 @@ connection_buckets_decrement(connection_t *conn, time_t now,
 
   record_num_bytes_transferred_impl(conn, now, num_read, num_written);
 
+  /* Edge connection need to decrement the read side of the bucket used by our
+   * congestion control. */
+  if (CONN_IS_EDGE(conn) && num_read > 0) {
+    edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
+    token_bucket_rw_dec(&edge_conn->bucket, num_read, 0);
+  }
+
   if (!connection_is_rate_limited(conn))
     return; /* local IPs are free */
 
@@ -3697,14 +3730,16 @@ connection_write_bw_exhausted(connection_t *conn, bool is_global_bw)
 void
 connection_consider_empty_read_buckets(connection_t *conn)
 {
+  int is_global = 1;
   const char *reason;
 
-  if (!connection_is_rate_limited(conn))
+  if (CONN_IS_EDGE(conn) &&
+             token_bucket_rw_get_read(&TO_EDGE_CONN(conn)->bucket) <= 0) {
+    reason = "edge connection read bucket exhausted. Pausing.";
+    is_global = false;
+  } else if (!connection_is_rate_limited(conn)) {
     return; /* Always okay. */
-
-  int is_global = 1;
-
-  if (token_bucket_rw_get_read(&global_bucket) <= 0) {
+  } else if (token_bucket_rw_get_read(&global_bucket) <= 0) {
     reason = "global read bucket exhausted. Pausing.";
   } else if (connection_counts_as_relayed_traffic(conn, approx_time()) &&
              token_bucket_rw_get_read(&global_relayed_bucket) <= 0) {
@@ -3714,8 +3749,9 @@ connection_consider_empty_read_buckets(connection_t *conn)
              token_bucket_rw_get_read(&TO_OR_CONN(conn)->bucket) <= 0) {
     reason = "connection read bucket exhausted. Pausing.";
     is_global = false;
-  } else
+  } else {
     return; /* all good, no need to stop it */
+  }
 
   LOG_FN_CONN(conn, (LOG_DEBUG, LD_NET, "%s", reason));
   connection_read_bw_exhausted(conn, is_global);
@@ -3819,6 +3855,10 @@ connection_bucket_refill_single(connection_t *conn, uint32_t now_ts)
     or_connection_t *or_conn = TO_OR_CONN(conn);
     token_bucket_rw_refill(&or_conn->bucket, now_ts);
   }
+
+  if (CONN_IS_EDGE(conn)) {
+    token_bucket_rw_refill(&TO_EDGE_CONN(conn)->bucket, now_ts);
+  }
 }
 
 /**
@@ -4556,9 +4596,9 @@ connection_handle_write_impl(connection_t *conn, int force)
       !dont_stop_writing) { /* it's done flushing */
     if (connection_finished_flushing(conn) < 0) {
       /* already marked */
-      return -1;
+      goto err;
     }
-    return 0;
+    goto done;
   }
 
   /* Call even if result is 0, since the global write bucket may
@@ -4568,7 +4608,17 @@ connection_handle_write_impl(connection_t *conn, int force)
   if (n_read > 0 && connection_is_reading(conn))
     connection_consider_empty_read_buckets(conn);
 
+ done:
+  /* If this is an edge connection with congestion control, check to see
+   * if it is time to send an xon */
+  if (conn_uses_flow_control(conn)) {
+    flow_control_decide_xon(TO_EDGE_CONN(conn), n_written);
+  }
+
   return 0;
+
+ err:
+  return -1;
 }
 
 /* DOCDOC connection_handle_write */
diff --git a/src/core/mainloop/connection.h b/src/core/mainloop/connection.h
index 36c94d6570..8b378b15a4 100644
--- a/src/core/mainloop/connection.h
+++ b/src/core/mainloop/connection.h
@@ -274,8 +274,8 @@ void connection_buf_add_compress(const char *string, size_t len,
                                  struct dir_connection_t *conn, int done);
 void connection_buf_add_buf(struct connection_t *conn, struct buf_t *buf);
 
-size_t connection_get_inbuf_len(struct connection_t *conn);
-size_t connection_get_outbuf_len(struct connection_t *conn);
+size_t connection_get_inbuf_len(const struct connection_t *conn);
+size_t connection_get_outbuf_len(const struct connection_t *conn);
 struct connection_t *connection_get_by_global_id(uint64_t id);
 
 struct connection_t *connection_get_by_type(int type);
diff --git a/src/core/mainloop/mainloop.c b/src/core/mainloop/mainloop.c
index 69606c0d53..cd57dea3d4 100644
--- a/src/core/mainloop/mainloop.c
+++ b/src/core/mainloop/mainloop.c
@@ -641,6 +641,13 @@ connection_start_reading,(connection_t *conn))
     if (connection_should_read_from_linked_conn(conn))
       connection_start_reading_from_linked_conn(conn);
   } else {
+    if (CONN_IS_EDGE(conn) && TO_EDGE_CONN(conn)->xoff_received) {
+      /* We should not get called here if we're waiting for an XON, but
+       * belt-and-suspenders */
+      log_notice(LD_NET,
+                 "Request to start reading on an edgeconn blocked with XOFF");
+      return;
+    }
     if (event_add(conn->read_event, NULL))
       log_warn(LD_NET, "Error from libevent setting read event state for %d "
                "to watched: %s",
@@ -1293,6 +1300,7 @@ signewnym_impl(time_t now)
   circuit_mark_all_dirty_circs_as_unusable();
   addressmap_clear_transient();
   hs_client_purge_state();
+  purge_vanguards_lite();
   time_of_last_signewnym = now;
   signewnym_is_pending = 0;
 
@@ -1370,6 +1378,7 @@ CALLBACK(save_state);
 CALLBACK(write_stats_file);
 CALLBACK(control_per_second_events);
 CALLBACK(second_elapsed);
+CALLBACK(manage_vglite);
 
 #undef CALLBACK
 
@@ -1392,6 +1401,9 @@ STATIC periodic_event_item_t mainloop_periodic_events[] = {
   CALLBACK(second_elapsed, NET_PARTICIPANT,
            FL(RUN_ON_DISABLE)),
 
+  /* Update vanguards-lite once per hour, if we have networking */
+  CALLBACK(manage_vglite, NET_PARTICIPANT, FL(NEED_NET)),
+
   /* XXXX Do we have a reason to do this on a callback? Does it do any good at
    * all?  For now, if we're dormant, we can let our listeners decay. */
   CALLBACK(retry_listeners, NET_PARTICIPANT, FL(NEED_NET)),
@@ -1662,6 +1674,21 @@ mainloop_schedule_shutdown(int delay_sec)
   mainloop_event_schedule(scheduled_shutdown_ev, &delay_tv);
 }
 
+/**
+ * Update vanguards-lite layer2 nodes, once every 15 minutes
+ */
+static int
+manage_vglite_callback(time_t now, const or_options_t *options)
+{
+ (void)now;
+ (void)options;
+#define VANGUARDS_LITE_INTERVAL (15*60)
+
+  maintain_layer2_guards();
+
+  return VANGUARDS_LITE_INTERVAL;
+}
+
 /** Perform regular maintenance tasks.  This function gets run once per
  * second.
  */
diff --git a/src/core/or/channel.c b/src/core/or/channel.c
index c4f3e76fc8..c46fa93e58 100644
--- a/src/core/or/channel.c
+++ b/src/core/or/channel.c
@@ -2629,24 +2629,42 @@ channel_dump_statistics, (channel_t *chan, int severity))
          circuitmux_num_circuits(chan->cmux) : 0);
 
   /* Describe timestamps */
-  tor_log(severity, LD_GENERAL,
-      " * Channel %"PRIu64 " was last used by a "
-      "client at %"PRIu64 " (%"PRIu64 " seconds ago)",
-      (chan->global_identifier),
-      (uint64_t)(chan->timestamp_client),
-      (uint64_t)(now - chan->timestamp_client));
-  tor_log(severity, LD_GENERAL,
-      " * Channel %"PRIu64 " last received a cell "
-      "at %"PRIu64 " (%"PRIu64 " seconds ago)",
-      (chan->global_identifier),
-      (uint64_t)(chan->timestamp_recv),
-      (uint64_t)(now - chan->timestamp_recv));
-  tor_log(severity, LD_GENERAL,
-      " * Channel %"PRIu64 " last transmitted a cell "
-      "at %"PRIu64 " (%"PRIu64 " seconds ago)",
-      (chan->global_identifier),
-      (uint64_t)(chan->timestamp_xmit),
-      (uint64_t)(now - chan->timestamp_xmit));
+  if (chan->timestamp_client == 0) {
+      tor_log(severity, LD_GENERAL,
+              " * Channel %"PRIu64 " was never used by a "
+             "client", (chan->global_identifier));
+  } else {
+      tor_log(severity, LD_GENERAL,
+              " * Channel %"PRIu64 " was last used by a "
+              "client at %"PRIu64 " (%"PRIu64 " seconds ago)",
+              (chan->global_identifier),
+              (uint64_t)(chan->timestamp_client),
+              (uint64_t)(now - chan->timestamp_client));
+  }
+  if (chan->timestamp_recv == 0) {
+      tor_log(severity, LD_GENERAL,
+              " * Channel %"PRIu64 " never received a cell",
+              (chan->global_identifier));
+  } else {
+      tor_log(severity, LD_GENERAL,
+              " * Channel %"PRIu64 " last received a cell "
+              "at %"PRIu64 " (%"PRIu64 " seconds ago)",
+              (chan->global_identifier),
+              (uint64_t)(chan->timestamp_recv),
+              (uint64_t)(now - chan->timestamp_recv));
+  }
+  if (chan->timestamp_xmit == 0) {
+      tor_log(severity, LD_GENERAL,
+              " * Channel %"PRIu64 " never transmitted a cell",
+              (chan->global_identifier));
+  } else {
+      tor_log(severity, LD_GENERAL,
+              " * Channel %"PRIu64 " last transmitted a cell "
+              "at %"PRIu64 " (%"PRIu64 " seconds ago)",
+              (chan->global_identifier),
+              (uint64_t)(chan->timestamp_xmit),
+              (uint64_t)(now - chan->timestamp_xmit));
+  }
 
   /* Describe counters and rates */
   tor_log(severity, LD_GENERAL,
diff --git a/src/core/or/channeltls.c b/src/core/or/channeltls.c
index 481dafef91..9db8e2392d 100644
--- a/src/core/or/channeltls.c
+++ b/src/core/or/channeltls.c
@@ -64,6 +64,7 @@
 #include "trunnel/netinfo.h"
 #include "core/or/channelpadding.h"
 #include "core/or/extendinfo.h"
+#include "core/or/congestion_control_common.h"
 
 #include "core/or/cell_st.h"
 #include "core/or/cell_queue_st.h"
@@ -793,7 +794,7 @@ channel_tls_num_cells_writeable_method(channel_t *chan)
   cell_network_size = get_cell_network_size(tlschan->conn->wide_circ_ids);
   outbuf_len = connection_get_outbuf_len(TO_CONN(tlschan->conn));
   /* Get the number of cells */
-  n = CEIL_DIV(OR_CONN_HIGHWATER - outbuf_len, cell_network_size);
+  n = CEIL_DIV(or_conn_highwatermark() - outbuf_len, cell_network_size);
   if (n < 0) n = 0;
 #if SIZEOF_SIZE_T > SIZEOF_INT
   if (n > INT_MAX) n = INT_MAX;
diff --git a/src/core/or/circuit_st.h b/src/core/or/circuit_st.h
index 870bcbf7cf..be6429438a 100644
--- a/src/core/or/circuit_st.h
+++ b/src/core/or/circuit_st.h
@@ -22,6 +22,7 @@
 struct hs_token_t;
 struct circpad_machine_spec_t;
 struct circpad_machine_runtime_t;
+struct congestion_control_t;
 
 /** Number of padding state machines on a circuit. */
 #define CIRCPAD_MAX_MACHINES (2)
@@ -244,6 +245,9 @@ struct circuit_t {
    * that STOP commands actually correspond to the current machine,
    * and not a previous one. */
   uint32_t padding_machine_ctr;
+
+  /** Congestion control fields */
+  struct congestion_control_t *ccontrol;
 };
 
 #endif /* !defined(CIRCUIT_ST_H) */
diff --git a/src/core/or/circuitbuild.c b/src/core/or/circuitbuild.c
index 2bcc642a97..31e3868b65 100644
--- a/src/core/or/circuitbuild.c
+++ b/src/core/or/circuitbuild.c
@@ -1359,7 +1359,9 @@ route_len_for_purpose(uint8_t purpose, extend_info_t *exit_ei)
   int routelen = DEFAULT_ROUTE_LEN;
   int known_purpose = 0;
 
-  if (circuit_should_use_vanguards(purpose)) {
+  /* If we're using L3 vanguards, we need longer paths for onion services */
+  if (circuit_purpose_is_hidden_service(purpose) &&
+      get_options()->HSLayer3Nodes) {
     /* Clients want an extra hop for rends to avoid linkability.
      * Services want it for intro points to avoid publishing their
      * layer3 guards. They want it for hsdir posts to use
@@ -1374,14 +1376,6 @@ route_len_for_purpose(uint8_t purpose, extend_info_t *exit_ei)
         purpose == CIRCUIT_PURPOSE_S_ESTABLISH_INTRO)
       return routelen+1;
 
-    /* If we only have Layer2 vanguards, then we do not need
-     * the extra hop for linkabilty reasons (see below).
-     * This means all hops can be of the form:
-     *   S/C - G - L2 - M - R/HSDir/I
-     */
-    if (get_options()->HSLayer2Nodes && !get_options()->HSLayer3Nodes)
-      return routelen+1;
-
     /* For connections to hsdirs, clients want two extra hops
      * when using layer3 guards, to avoid linkability.
      * Same goes for intro points. Note that the route len
@@ -1400,16 +1394,14 @@ route_len_for_purpose(uint8_t purpose, extend_info_t *exit_ei)
     return routelen;
 
   switch (purpose) {
-    /* These two purposes connect to a router that we chose, so
-     * DEFAULT_ROUTE_LEN is safe. */
-  case CIRCUIT_PURPOSE_S_ESTABLISH_INTRO:
-    /* hidden service connecting to introduction point */
+    /* These purposes connect to a router that we chose, so DEFAULT_ROUTE_LEN
+     * is safe: */
   case CIRCUIT_PURPOSE_TESTING:
     /* router reachability testing */
     known_purpose = 1;
     break;
 
-    /* These three purposes connect to a router that someone else
+    /* These purposes connect to a router that someone else
      * might have chosen, so add an extra hop to protect anonymity. */
   case CIRCUIT_PURPOSE_C_GENERAL:
   case CIRCUIT_PURPOSE_C_HSDIR_GET:
@@ -1419,6 +1411,9 @@ route_len_for_purpose(uint8_t purpose, extend_info_t *exit_ei)
     /* client connecting to introduction point */
   case CIRCUIT_PURPOSE_S_CONNECT_REND:
     /* hidden service connecting to rendezvous point */
+  case CIRCUIT_PURPOSE_S_ESTABLISH_INTRO:
+    /* hidden service connecting to intro point. In this case we want an extra
+       hop to avoid linkability attacks by the introduction point. */
     known_purpose = 1;
     routelen++;
     break;
@@ -2019,7 +2014,7 @@ cpath_build_state_to_crn_ipv6_extend_flag(const cpath_build_state_t *state,
 }
 
 /** Decide a suitable length for circ's cpath, and pick an exit
- * router (or use <b>exit</b> if provided). Store these in the
+ * router (or use <b>exit_ei</b> if provided). Store these in the
  * cpath.
  *
  * If <b>is_hs_v3_rp_circuit</b> is set, then this exit should be suitable to
@@ -2072,7 +2067,7 @@ onion_pick_cpath_exit(origin_circuit_t *circ, extend_info_t *exit_ei,
   return 0;
 }
 
-/** Give <b>circ</b> a new exit destination to <b>exit</b>, and add a
+/** Give <b>circ</b> a new exit destination to <b>exit_ei</b>, and add a
  * hop to the cpath reflecting this. Don't send the next extend cell --
  * the caller will do this if it wants to.
  */
@@ -2114,8 +2109,6 @@ circuit_extend_to_new_exit(origin_circuit_t *circ, extend_info_t *exit_ei)
     return -1;
   }
 
-  // XXX: Should cannibalized circuits be dirty or not? Not easy to say..
-
   return 0;
 }
 
@@ -2261,8 +2254,14 @@ middle_node_must_be_vanguard(const or_options_t *options,
     return 0;
   }
 
-  /* If we have sticky L2 nodes, and this is an L2 pick, use vanguards */
-  if (options->HSLayer2Nodes && cur_len == 1) {
+  /* Don't even bother if the feature is disabled */
+  if (!vanguards_lite_is_enabled()) {
+    return 0;
+  }
+
+  /* If we are a hidden service circuit, always use either vanguards-lite
+   * or HSLayer2Nodes for 2nd hop. */
+  if (cur_len == 1) {
     return 1;
   }
 
@@ -2286,7 +2285,8 @@ pick_vanguard_middle_node(const or_options_t *options,
 
   /* Pick the right routerset based on the current hop */
   if (cur_len == 1) {
-    vanguard_routerset = options->HSLayer2Nodes;
+    vanguard_routerset = options->HSLayer2Nodes ?
+      options->HSLayer2Nodes : get_layer2_guards();
   } else if (cur_len == 2) {
     vanguard_routerset = options->HSLayer3Nodes;
   } else {
@@ -2295,6 +2295,10 @@ pick_vanguard_middle_node(const or_options_t *options,
     return NULL;
   }
 
+  if (BUG(!vanguard_routerset)) {
+    return NULL;
+  }
+
   node = pick_restricted_middle_node(flags, vanguard_routerset,
                                      options->ExcludeNodes, excluded,
                                      cur_len+1);
diff --git a/src/core/or/circuitlist.c b/src/core/or/circuitlist.c
index 4f62284e29..4dbf4d4549 100644
--- a/src/core/or/circuitlist.c
+++ b/src/core/or/circuitlist.c
@@ -64,6 +64,7 @@
 #include "core/or/circuitpadding.h"
 #include "core/or/crypt_path.h"
 #include "core/or/extendinfo.h"
+#include "core/or/status.h"
 #include "core/or/trace_probes_circuit.h"
 #include "core/mainloop/connection.h"
 #include "app/config/config.h"
@@ -100,6 +101,7 @@
 #include "lib/compress/compress_zlib.h"
 #include "lib/compress/compress_zstd.h"
 #include "lib/buf/buffers.h"
+#include "core/or/congestion_control_common.h"
 
 #include "core/or/ocirc_event.h"
 
@@ -1143,6 +1145,8 @@ circuit_free_(circuit_t *circ)
    * hs identifier is freed. */
   hs_circ_cleanup_on_free(circ);
 
+  congestion_control_free(circ->ccontrol);
+
   if (CIRCUIT_IS_ORIGIN(circ)) {
     origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ);
     mem = ocirc;
@@ -2343,6 +2347,12 @@ circuit_about_to_free(circuit_t *circ)
       circuitmux_detach_circuit(or_circ->p_chan->cmux, circ);
       circuit_set_p_circid_chan(or_circ, 0, NULL);
     }
+
+    if (or_circ->n_cells_discarded_at_end) {
+      time_t age = approx_time() - circ->timestamp_created.tv_sec;
+      note_circ_closed_for_unrecognized_cells(
+                      age, or_circ->n_cells_discarded_at_end);
+    }
   } else {
     origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ);
     edge_connection_t *conn;
@@ -2586,8 +2596,10 @@ conns_compare_by_buffer_age_(const void **a_, const void **b_)
 
 /** We're out of memory for cells, having allocated <b>current_allocation</b>
  * bytes' worth.  Kill the 'worst' circuits until we're under
- * FRACTION_OF_DATA_TO_RETAIN_ON_OOM of our maximum usage. */
-void
+ * FRACTION_OF_DATA_TO_RETAIN_ON_OOM of our maximum usage.
+ *
+ * Return the number of bytes removed. */
+size_t
 circuits_handle_oom(size_t current_allocation)
 {
   smartlist_t *circlist;
@@ -2597,6 +2609,7 @@ circuits_handle_oom(size_t current_allocation)
   size_t mem_recovered=0;
   int n_circuits_killed=0;
   int n_dirconns_killed=0;
+  int n_edgeconns_killed = 0;
   uint32_t now_ts;
   log_notice(LD_GENERAL, "We're low on memory (cell queues total alloc:"
              " %"TOR_PRIuSZ" buffer total alloc: %" TOR_PRIuSZ ","
@@ -2613,12 +2626,11 @@ circuits_handle_oom(size_t current_allocation)
              tor_zstd_get_total_allocation(),
              tor_lzma_get_total_allocation(),
              hs_cache_get_total_allocation());
-
   {
     size_t mem_target = (size_t)(get_options()->MaxMemInQueues *
                                  FRACTION_OF_DATA_TO_RETAIN_ON_OOM);
     if (current_allocation <= mem_target)
-      return;
+      return 0;
     mem_to_recover = current_allocation - mem_target;
   }
 
@@ -2664,12 +2676,19 @@ circuits_handle_oom(size_t current_allocation)
       if (conn_age < circ->age_tmp) {
         break;
       }
-      if (conn->type == CONN_TYPE_DIR && conn->linked_conn == NULL) {
+      /* Also consider edge connections so we don't accumulate bytes on the
+       * outbuf due to a malicious destination holding off the read on us. */
+      if ((conn->type == CONN_TYPE_DIR && conn->linked_conn == NULL) ||
+          CONN_IS_EDGE(conn)) {
         if (!conn->marked_for_close)
           connection_mark_for_close(conn);
         mem_recovered += single_conn_free_bytes(conn);
 
-        ++n_dirconns_killed;
+        if (conn->type == CONN_TYPE_DIR) {
+          ++n_dirconns_killed;
+        } else {
+          ++n_edgeconns_killed;
+        }
 
         if (mem_recovered >= mem_to_recover)
           goto done_recovering_mem;
@@ -2697,14 +2716,16 @@ circuits_handle_oom(size_t current_allocation)
   } SMARTLIST_FOREACH_END(circ);
 
  done_recovering_mem:
-
   log_notice(LD_GENERAL, "Removed %"TOR_PRIuSZ" bytes by killing %d circuits; "
              "%d circuits remain alive. Also killed %d non-linked directory "
-             "connections.",
+             "connections. Killed %d edge connections",
              mem_recovered,
              n_circuits_killed,
              smartlist_len(circlist) - n_circuits_killed,
-             n_dirconns_killed);
+             n_dirconns_killed,
+             n_edgeconns_killed);
+
+  return mem_recovered;
 }
 
 /** Verify that circuit <b>c</b> has all of its invariants
diff --git a/src/core/or/circuitlist.h b/src/core/or/circuitlist.h
index f5791d7c12..147e2cb2f8 100644
--- a/src/core/or/circuitlist.h
+++ b/src/core/or/circuitlist.h
@@ -232,7 +232,7 @@ int circuit_count_pending_on_channel(channel_t *chan);
 
 MOCK_DECL(void, assert_circuit_ok,(const circuit_t *c));
 void circuit_free_all(void);
-void circuits_handle_oom(size_t current_allocation);
+size_t circuits_handle_oom(size_t current_allocation);
 
 void circuit_clear_testing_cell_stats(circuit_t *circ);
 
diff --git a/src/core/or/circuitmux_ewma.c b/src/core/or/circuitmux_ewma.c
index 0382e62f75..adf256ab05 100644
--- a/src/core/or/circuitmux_ewma.c
+++ b/src/core/or/circuitmux_ewma.c
@@ -45,7 +45,10 @@
 /*** EWMA parameter #defines ***/
 
 /** How long does a tick last (seconds)? */
-#define EWMA_TICK_LEN 10
+#define EWMA_TICK_LEN_DEFAULT 10
+#define EWMA_TICK_LEN_MIN 1
+#define EWMA_TICK_LEN_MAX 600
+static int ewma_tick_len = EWMA_TICK_LEN_DEFAULT;
 
 /** The default per-tick scale factor, if it hasn't been overridden by a
  * consensus or a configuration setting.  zero means "disabled". */
@@ -148,7 +151,7 @@ cell_ewma_get_tick(void)
   monotime_coarse_get(&now);
   int32_t msec_diff = monotime_coarse_diff_msec32(&start_of_current_tick,
                                                   &now);
-  return current_tick_num + msec_diff / (1000*EWMA_TICK_LEN);
+  return current_tick_num + msec_diff / (1000*ewma_tick_len);
 }
 
 /**
@@ -527,15 +530,15 @@ cell_ewma_get_current_tick_and_fraction(double *remainder_out)
   monotime_coarse_get(&now);
   int32_t msec_diff = monotime_coarse_diff_msec32(&start_of_current_tick,
                                                   &now);
-  if (msec_diff > (1000*EWMA_TICK_LEN)) {
-    unsigned ticks_difference = msec_diff / (1000*EWMA_TICK_LEN);
+  if (msec_diff > (1000*ewma_tick_len)) {
+    unsigned ticks_difference = msec_diff / (1000*ewma_tick_len);
     monotime_coarse_add_msec(&start_of_current_tick,
                              &start_of_current_tick,
-                             ticks_difference * 1000 * EWMA_TICK_LEN);
+                             ticks_difference * 1000 * ewma_tick_len);
     current_tick_num += ticks_difference;
-    msec_diff %= 1000*EWMA_TICK_LEN;
+    msec_diff %= 1000*ewma_tick_len;
   }
-  *remainder_out = ((double)msec_diff) / (1.0e3 * EWMA_TICK_LEN);
+  *remainder_out = ((double)msec_diff) / (1.0e3 * ewma_tick_len);
   return current_tick_num;
 }
 
@@ -605,15 +608,20 @@ cmux_ewma_set_options(const or_options_t *options,
   /* Both options and consensus can be NULL. This assures us to either get a
    * valid configured value or the default one. */
   halflife = get_circuit_priority_halflife(options, consensus, &source);
+  ewma_tick_len = networkstatus_get_param(consensus,
+                                        "CircuitPriorityTickSecs",
+                                        EWMA_TICK_LEN_DEFAULT,
+                                        EWMA_TICK_LEN_MIN,
+                                        EWMA_TICK_LEN_MAX);
 
   /* convert halflife into halflife-per-tick. */
-  halflife /= EWMA_TICK_LEN;
+  halflife /= ewma_tick_len;
   /* compute per-tick scale factor. */
   ewma_scale_factor = exp(LOG_ONEHALF / halflife);
   log_info(LD_OR,
            "Enabled cell_ewma algorithm because of value in %s; "
            "scale factor is %f per %d seconds",
-           source, ewma_scale_factor, EWMA_TICK_LEN);
+           source, ewma_scale_factor, ewma_tick_len);
 }
 
 /** Return the multiplier necessary to convert the value of a cell sent in
diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c
index 6dfe94de01..99dc5f9d83 100644
--- a/src/core/or/circuitpadding.c
+++ b/src/core/or/circuitpadding.c
@@ -2967,6 +2967,8 @@ signed_error_t
 circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell)
 {
   int retval = 0;
+  /* Should we send back a STOP cell? */
+  bool respond_with_stop = true;
   circpad_negotiate_t *negotiate;
 
   if (CIRCUIT_IS_ORIGIN(circ)) {
@@ -2992,6 +2994,12 @@ circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell)
                negotiate->machine_type, negotiate->machine_ctr);
       goto done;
     }
+
+    /* If we reached this point we received a STOP command from an old or
+       unknown machine. Don't reply with our own STOP since there is no one to
+       handle it on the other end */
+    respond_with_stop = false;
+
     if (negotiate->machine_ctr <= circ->padding_machine_ctr) {
       log_info(LD_CIRC, "Received STOP command for old machine %u, ctr %u",
                negotiate->machine_type, negotiate->machine_ctr);
@@ -3023,10 +3031,13 @@ circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell)
     retval = -1;
 
   done:
-    circpad_padding_negotiated(circ, negotiate->machine_type,
-                   negotiate->command,
-                   (retval == 0) ? CIRCPAD_RESPONSE_OK : CIRCPAD_RESPONSE_ERR,
-                   negotiate->machine_ctr);
+    if (respond_with_stop) {
+      circpad_padding_negotiated(circ, negotiate->machine_type,
+                    negotiate->command,
+                    (retval == 0) ? CIRCPAD_RESPONSE_OK : CIRCPAD_RESPONSE_ERR,
+                    negotiate->machine_ctr);
+    }
+
     circpad_negotiate_free(negotiate);
 
     return retval;
diff --git a/src/core/or/circuituse.c b/src/core/or/circuituse.c
index 044b30b8b3..2ec391eca0 100644
--- a/src/core/or/circuituse.c
+++ b/src/core/or/circuituse.c
@@ -1204,25 +1204,6 @@ needs_circuits_for_build(int num)
   return 0;
 }
 
-/**
- * Launch the appropriate type of predicted circuit for hidden
- * services, depending on our options.
- */
-static void
-circuit_launch_predicted_hs_circ(int flags)
-{
-  /* K.I.S.S. implementation of bug #23101: If we are using
-   * vanguards or pinned middles, pre-build a specific purpose
-   * for HS circs. */
-  if (circuit_should_use_vanguards(CIRCUIT_PURPOSE_HS_VANGUARDS)) {
-    circuit_launch(CIRCUIT_PURPOSE_HS_VANGUARDS, flags);
-  } else {
-    /* If no vanguards, then no HS-specific prebuilt circuits are needed.
-     * Normal GENERAL circs are fine */
-    circuit_launch(CIRCUIT_PURPOSE_C_GENERAL, flags);
-  }
-}
-
 /** Determine how many circuits we have open that are clean,
  * Make sure it's enough for all the upcoming behaviors we predict we'll have.
  * But put an upper bound on the total number of circuits.
@@ -1276,7 +1257,7 @@ circuit_predict_and_launch_new(void)
              "Have %d clean circs (%d internal), need another internal "
              "circ for my hidden service.",
              num, num_internal);
-    circuit_launch_predicted_hs_circ(flags);
+    circuit_launch(CIRCUIT_PURPOSE_HS_VANGUARDS, flags);
     return;
   }
 
@@ -1295,7 +1276,10 @@ circuit_predict_and_launch_new(void)
              " another hidden service circ.",
              num, num_uptime_internal, num_internal);
 
-    circuit_launch_predicted_hs_circ(flags);
+    /* Always launch vanguards purpose circuits for HS clients,
+     * for vanguards-lite. This prevents us from cannibalizing
+     * to build these circuits (and thus not use vanguards). */
+    circuit_launch(CIRCUIT_PURPOSE_HS_VANGUARDS, flags);
     return;
   }
 
@@ -2022,16 +2006,12 @@ circuit_is_hs_v3(const circuit_t *circ)
 int
 circuit_should_use_vanguards(uint8_t purpose)
 {
-  const or_options_t *options = get_options();
-
-  /* Only hidden service circuits use vanguards */
-  if (!circuit_purpose_is_hidden_service(purpose))
-    return 0;
-
-  /* Pinned middles are effectively vanguards */
-  if (options->HSLayer2Nodes || options->HSLayer3Nodes)
+  /* All hidden service circuits use either vanguards or
+   * vanguards-lite. */
+  if (circuit_purpose_is_hidden_service(purpose))
     return 1;
 
+  /* Everything else is a normal circuit */
   return 0;
 }
 
@@ -2069,13 +2049,11 @@ circuit_should_cannibalize_to_build(uint8_t purpose_to_build,
     return 0;
   }
 
-  /* For vanguards, the server-side intro circ is not cannibalized
-   * because we pre-build 4 hop HS circuits, and it only needs a 3 hop
-   * circuit. It is also long-lived, so it is more important that
-   * it have lower latency than get built fast.
+  /* The server-side intro circ is not cannibalized because it only
+   * needs a 3 hop circuit. It is also long-lived, so it is more
+   * important that it have lower latency than get built fast.
    */
-  if (circuit_should_use_vanguards(purpose_to_build) &&
-      purpose_to_build == CIRCUIT_PURPOSE_S_ESTABLISH_INTRO) {
+  if (purpose_to_build == CIRCUIT_PURPOSE_S_ESTABLISH_INTRO) {
     return 0;
   }
 
diff --git a/src/core/or/command.c b/src/core/or/command.c
index 622217a78e..40eb1554c0 100644
--- a/src/core/or/command.c
+++ b/src/core/or/command.c
@@ -563,7 +563,7 @@ command_process_relay_cell(cell_t *cell, channel_t *chan)
   }
 
   if ((reason = circuit_receive_relay_cell(cell, circ, direction)) < 0) {
-    log_fn(LOG_PROTOCOL_WARN,LD_PROTOCOL,"circuit_receive_relay_cell "
+    log_fn(LOG_DEBUG,LD_PROTOCOL,"circuit_receive_relay_cell "
            "(%s) failed. Closing.",
            direction==CELL_DIRECTION_OUT?"forward":"backward");
     /* Always emit a bandwidth event for closed circs */
diff --git a/src/core/or/congestion_control_common.c b/src/core/or/congestion_control_common.c
new file mode 100644
index 0000000000..0919f037db
--- /dev/null
+++ b/src/core/or/congestion_control_common.c
@@ -0,0 +1,1038 @@
+/* Copyright (c) 2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_common.c
+ * \brief Common code used by all congestion control algorithms.
+ */
+
+#define TOR_CONGESTION_CONTROL_COMMON_PRIVATE
+
+#include "core/or/or.h"
+
+#include "core/or/circuitlist.h"
+#include "core/or/crypt_path.h"
+#include "core/or/or_circuit_st.h"
+#include "core/or/origin_circuit_st.h"
+#include "core/or/channel.h"
+#include "core/mainloop/connection.h"
+#include "core/or/sendme.h"
+#include "core/or/congestion_control_common.h"
+#include "core/or/congestion_control_vegas.h"
+#include "core/or/congestion_control_nola.h"
+#include "core/or/congestion_control_westwood.h"
+#include "core/or/congestion_control_st.h"
+#include "core/or/trace_probes_cc.h"
+#include "lib/time/compat_time.h"
+#include "feature/nodelist/networkstatus.h"
+
+/* Consensus parameter defaults.
+ *
+ * More details for each of the parameters can be found in proposal 324,
+ * section 6.5 including tuning notes. */
+#define CIRCWINDOW_INIT (500)
+#define SENDME_INC_DFLT (50)
+
+#define CWND_INC_DFLT (50)
+#define CWND_INC_PCT_SS_DFLT (100)
+#define CWND_INC_RATE_DFLT (1)
+#define CWND_MAX_DFLT (INT32_MAX)
+#define CWND_MIN_DFLT (MAX(100, SENDME_INC_DFLT))
+
+#define BWE_SENDME_MIN_DFLT (5)
+#define EWMA_CWND_COUNT_DFLT (2)
+
+/* BDP algorithms for each congestion control algorithms use the piecewise
+ * estimattor. See section 3.1.4 of proposal 324. */
+#define WESTWOOD_BDP_ALG BDP_ALG_PIECEWISE
+#define VEGAS_BDP_MIX_ALG BDP_ALG_PIECEWISE
+#define NOLA_BDP_ALG BDP_ALG_PIECEWISE
+
+/* Indicate OR connection buffer limitations used to stop or start accepting
+ * cells in its outbuf.
+ *
+ * These watermarks are historical to tor in a sense that they've been used
+ * almost from the genesis point. And were likely defined to fit the bounds of
+ * TLS records of 16KB which would be around 32 cells.
+ *
+ * These are defaults of the consensus parameter "orconn_high" and "orconn_low"
+ * values. */
+#define OR_CONN_HIGHWATER_DFLT (32*1024)
+#define OR_CONN_LOWWATER_DFLT (16*1024)
+
+/* Low and high values of circuit cell queue sizes. They are used to tell when
+ * to start or stop reading on the streams attached on the circuit.
+ *
+ * These are defaults of the consensus parameters "cellq_high" and "cellq_low".
+ */
+#define CELL_QUEUE_LOW_DFLT (10)
+#define CELL_QUEUE_HIGH_DFLT (256)
+
+static uint64_t congestion_control_update_circuit_rtt(congestion_control_t *,
+                                                      uint64_t);
+static bool congestion_control_update_circuit_bdp(congestion_control_t *,
+                                                  const circuit_t *,
+                                                  const crypt_path_t *,
+                                                  uint64_t, uint64_t);
+
+/* Consensus parameters cached. The non static ones are extern. */
+static uint32_t cwnd_max = CWND_MAX_DFLT;
+int32_t cell_queue_high = CELL_QUEUE_HIGH_DFLT;
+int32_t cell_queue_low = CELL_QUEUE_LOW_DFLT;
+uint32_t or_conn_highwater = OR_CONN_HIGHWATER_DFLT;
+uint32_t or_conn_lowwater = OR_CONN_LOWWATER_DFLT;
+
+/**
+ * Update global congestion control related consensus parameter values,
+ * every consensus update.
+ */
+void
+congestion_control_new_consensus_params(const networkstatus_t *ns)
+{
+#define CELL_QUEUE_HIGH_MIN (1)
+#define CELL_QUEUE_HIGH_MAX (1000)
+  cell_queue_high = networkstatus_get_param(ns, "cellq_high",
+      CELL_QUEUE_HIGH_DFLT,
+      CELL_QUEUE_HIGH_MIN,
+      CELL_QUEUE_HIGH_MAX);
+
+#define CELL_QUEUE_LOW_MIN (1)
+#define CELL_QUEUE_LOW_MAX (1000)
+  cell_queue_low = networkstatus_get_param(ns, "cellq_low",
+      CELL_QUEUE_LOW_DFLT,
+      CELL_QUEUE_LOW_MIN,
+      CELL_QUEUE_LOW_MAX);
+
+#define OR_CONN_HIGHWATER_MIN (CELL_PAYLOAD_SIZE)
+#define OR_CONN_HIGHWATER_MAX (INT32_MAX)
+  or_conn_highwater =
+    networkstatus_get_param(ns, "orconn_high",
+        OR_CONN_HIGHWATER_DFLT,
+        OR_CONN_HIGHWATER_MIN,
+        OR_CONN_HIGHWATER_MAX);
+
+#define OR_CONN_LOWWATER_MIN (CELL_PAYLOAD_SIZE)
+#define OR_CONN_LOWWATER_MAX (INT32_MAX)
+  or_conn_lowwater =
+    networkstatus_get_param(ns, "orconn_low",
+        OR_CONN_LOWWATER_DFLT,
+        OR_CONN_LOWWATER_MIN,
+        OR_CONN_LOWWATER_MAX);
+
+#define CWND_MAX_MIN 500
+#define CWND_MAX_MAX (INT32_MAX)
+  cwnd_max =
+    networkstatus_get_param(NULL, "cc_cwnd_max",
+        CWND_MAX_DFLT,
+        CWND_MAX_MIN,
+        CWND_MAX_MAX);
+}
+
+/**
+ * Set congestion control parameters on a circuit's congestion
+ * control object based on values from the consensus.
+ *
+ * cc_alg is the negotiated congestion control algorithm.
+ *
+ * sendme_inc is the number of packaged cells that a sendme cell
+ * acks. This parameter will come from circuit negotiation.
+ */
+static void
+congestion_control_init_params(congestion_control_t *cc,
+                               cc_alg_t cc_alg,
+                               int sendme_inc)
+{
+#define CWND_INIT_MIN 100
+#define CWND_INIT_MAX (10000)
+  cc->cwnd =
+    networkstatus_get_param(NULL, "cc_cwnd_init",
+        CIRCWINDOW_INIT,
+        CWND_INIT_MIN,
+        CWND_INIT_MAX);
+
+#define CWND_INC_PCT_SS_MIN 1
+#define CWND_INC_PCT_SS_MAX (500)
+  cc->cwnd_inc_pct_ss =
+    networkstatus_get_param(NULL, "cc_cwnd_inc_pct_ss",
+        CWND_INC_PCT_SS_DFLT,
+        CWND_INC_PCT_SS_MIN,
+        CWND_INC_PCT_SS_MAX);
+
+#define CWND_INC_MIN 1
+#define CWND_INC_MAX (1000)
+  cc->cwnd_inc =
+    networkstatus_get_param(NULL, "cc_cwnd_inc",
+        CWND_INC_DFLT,
+        CWND_INC_MIN,
+        CWND_INC_MAX);
+
+#define CWND_INC_RATE_MIN 1
+#define CWND_INC_RATE_MAX (250)
+  cc->cwnd_inc_rate =
+    networkstatus_get_param(NULL, "cc_cwnd_inc_rate",
+        CWND_INC_RATE_DFLT,
+        CWND_INC_RATE_MIN,
+        CWND_INC_RATE_MAX);
+
+#define SENDME_INC_MIN 10
+#define SENDME_INC_MAX (1000)
+  cc->sendme_inc =
+    networkstatus_get_param(NULL, "cc_sendme_inc",
+        sendme_inc,
+        SENDME_INC_MIN,
+        SENDME_INC_MAX);
+
+  // XXX: this min needs to abide by sendme_inc range rules somehow
+#define CWND_MIN_MIN sendme_inc
+#define CWND_MIN_MAX (1000)
+  cc->cwnd_min =
+    networkstatus_get_param(NULL, "cc_cwnd_min",
+        CWND_MIN_DFLT,
+        CWND_MIN_MIN,
+        CWND_MIN_MAX);
+
+#define EWMA_CWND_COUNT_MIN 1
+#define EWMA_CWND_COUNT_MAX (100)
+  cc->ewma_cwnd_cnt =
+    networkstatus_get_param(NULL, "cc_ewma_cwnd_cnt",
+        EWMA_CWND_COUNT_DFLT,
+        EWMA_CWND_COUNT_MIN,
+        EWMA_CWND_COUNT_MAX);
+
+#define BWE_SENDME_MIN_MIN 2
+#define BWE_SENDME_MIN_MAX (20)
+  cc->bwe_sendme_min =
+    networkstatus_get_param(NULL, "cc_bwe_min",
+        BWE_SENDME_MIN_DFLT,
+        BWE_SENDME_MIN_MIN,
+        BWE_SENDME_MIN_MAX);
+
+#define CC_ALG_MIN 0
+#define CC_ALG_MAX (NUM_CC_ALGS-1)
+  cc->cc_alg =
+    networkstatus_get_param(NULL, "cc_alg",
+        cc_alg,
+        CC_ALG_MIN,
+        CC_ALG_MAX);
+
+  bdp_alg_t default_bdp_alg = 0;
+
+  switch (cc->cc_alg) {
+    case CC_ALG_WESTWOOD:
+      default_bdp_alg = WESTWOOD_BDP_ALG;
+      break;
+    case CC_ALG_VEGAS:
+      default_bdp_alg = VEGAS_BDP_MIX_ALG;
+      break;
+    case CC_ALG_NOLA:
+      default_bdp_alg = NOLA_BDP_ALG;
+      break;
+    case CC_ALG_SENDME:
+    default:
+      tor_fragile_assert();
+      return; // No alg-specific params
+  }
+
+  cc->bdp_alg =
+    networkstatus_get_param(NULL, "cc_bdp_alg",
+        default_bdp_alg,
+        0,
+        NUM_BDP_ALGS-1);
+
+  /* Algorithm-specific parameters */
+  if (cc->cc_alg == CC_ALG_WESTWOOD) {
+    congestion_control_westwood_set_params(cc);
+  } else if (cc->cc_alg == CC_ALG_VEGAS) {
+    congestion_control_vegas_set_params(cc);
+  } else if (cc->cc_alg == CC_ALG_NOLA) {
+    congestion_control_nola_set_params(cc);
+  }
+}
+
+/**
+ * Allocate and initialize fields in congestion control object.
+ *
+ * cc_alg is the negotiated congestion control algorithm.
+ *
+ * sendme_inc is the number of packaged cells that a sendme cell
+ * acks. This parameter will come from circuit negotiation.
+ */
+static void
+congestion_control_init(congestion_control_t *cc, cc_alg_t cc_alg,
+                        int sendme_inc)
+{
+  cc->sendme_pending_timestamps = smartlist_new();
+  cc->sendme_arrival_timestamps = smartlist_new();
+
+  cc->in_slow_start = 1;
+  congestion_control_init_params(cc, cc_alg, sendme_inc);
+
+  cc->next_cc_event = CWND_UPDATE_RATE(cc);
+}
+
+/** Allocate and initialize a new congestion control object */
+congestion_control_t *
+congestion_control_new(void)
+{
+  congestion_control_t *cc = tor_malloc_zero(sizeof(congestion_control_t));
+
+  // XXX: the alg and the sendme_inc need to be negotiated during
+  // circuit handshake
+  congestion_control_init(cc, CC_ALG_VEGAS, SENDME_INC_DFLT);
+
+  return cc;
+}
+
+/**
+ * Free a congestion control object and its asssociated state.
+ */
+void
+congestion_control_free_(congestion_control_t *cc)
+{
+  if (!cc)
+    return;
+
+  SMARTLIST_FOREACH(cc->sendme_pending_timestamps, uint64_t *, t, tor_free(t));
+  SMARTLIST_FOREACH(cc->sendme_arrival_timestamps, uint64_t *, t, tor_free(t));
+  smartlist_free(cc->sendme_pending_timestamps);
+  smartlist_free(cc->sendme_arrival_timestamps);
+
+  tor_free(cc);
+}
+
+/**
+ * Enqueue a u64 timestamp to the end of a queue of timestamps.
+ */
+static inline void
+enqueue_timestamp(smartlist_t *timestamps_u64, uint64_t timestamp_usec)
+{
+  uint64_t *timestamp_ptr = tor_malloc(sizeof(uint64_t));
+  *timestamp_ptr = timestamp_usec;
+
+  smartlist_add(timestamps_u64, timestamp_ptr);
+}
+
+/**
+ * Peek at the head of a smartlist queue of u64 timestamps.
+ */
+static inline uint64_t
+peek_timestamp(const smartlist_t *timestamps_u64_usecs)
+{
+  uint64_t *timestamp_ptr = smartlist_get(timestamps_u64_usecs, 0);
+
+  if (BUG(!timestamp_ptr)) {
+    log_err(LD_CIRC, "Congestion control timestamp list became empty!");
+    return 0;
+  }
+
+  return *timestamp_ptr;
+}
+
+/**
+ * Dequeue a u64 monotime usec timestamp from the front of a
+ * smartlist of pointers to 64.
+ */
+static inline uint64_t
+dequeue_timestamp(smartlist_t *timestamps_u64_usecs)
+{
+  uint64_t *timestamp_ptr = smartlist_get(timestamps_u64_usecs, 0);
+  uint64_t timestamp_u64;
+
+  if (BUG(!timestamp_ptr)) {
+    log_err(LD_CIRC, "Congestion control timestamp list became empty!");
+    return 0;
+  }
+
+  timestamp_u64 = *timestamp_ptr;
+  smartlist_del_keeporder(timestamps_u64_usecs, 0);
+  tor_free(timestamp_ptr);
+
+  return timestamp_u64;
+}
+
+/**
+ * Returns the number of sendme acks that will be recieved in the
+ * current congestion window size, rounded to nearest int.
+ */
+static inline uint64_t
+sendme_acks_per_cwnd(const congestion_control_t *cc)
+{
+  /* We add half a sendme_inc to cwnd to round to the nearest int */
+  return ((cc->cwnd + cc->sendme_inc/2)/cc->sendme_inc);
+}
+
+/**
+ * Get a package window from either old sendme logic, or congestion control.
+ *
+ * A package window is how many cells you can still send.
+ */
+int
+congestion_control_get_package_window(const circuit_t *circ,
+                                      const crypt_path_t *cpath)
+{
+  int package_window;
+  congestion_control_t *cc;
+
+  tor_assert(circ);
+
+  if (cpath) {
+    package_window = cpath->package_window;
+    cc = cpath->ccontrol;
+  } else {
+    package_window = circ->package_window;
+    cc = circ->ccontrol;
+  }
+
+  if (!cc) {
+    return package_window;
+  } else {
+    /* Inflight can be above cwnd if cwnd was just reduced */
+    if (cc->inflight > cc->cwnd)
+      return 0;
+    /* In the extremely unlikely event that cwnd-inflight is larger than
+     * INT32_MAX, just return that cap, so old code doesn't explode. */
+    else if (cc->cwnd - cc->inflight > INT32_MAX)
+      return INT32_MAX;
+    else
+      return (int)(cc->cwnd - cc->inflight);
+  }
+}
+
+/**
+ * Returns the number of cells that are acked by every sendme.
+ */
+int
+sendme_get_inc_count(const circuit_t *circ, const crypt_path_t *layer_hint)
+{
+  int sendme_inc = CIRCWINDOW_INCREMENT;
+  congestion_control_t *cc = NULL;
+
+  if (layer_hint) {
+    cc = layer_hint->ccontrol;
+  } else {
+    cc = circ->ccontrol;
+  }
+
+  if (cc) {
+    sendme_inc = cc->sendme_inc;
+  }
+
+  return sendme_inc;
+}
+
+/** Return true iff the next cell we send will result in the other endpoint
+ * sending a SENDME.
+ *
+ * We are able to know that because the package or inflight window value minus
+ * one cell (the possible SENDME cell) should be a multiple of the
+ * cells-per-sendme increment value (set via consensus parameter, negotiated
+ * for the circuit, and passed in as sendme_inc).
+ *
+ * This function is used when recording a cell digest and this is done quite
+ * low in the stack when decrypting or encrypting a cell. The window is only
+ * updated once the cell is actually put in the outbuf.
+ */
+bool
+circuit_sent_cell_for_sendme(const circuit_t *circ,
+                             const crypt_path_t *layer_hint)
+{
+  congestion_control_t *cc;
+  int window;
+
+  tor_assert(circ);
+
+  if (layer_hint) {
+    window = layer_hint->package_window;
+    cc = layer_hint->ccontrol;
+  } else {
+    window = circ->package_window;
+    cc = circ->ccontrol;
+  }
+
+  /* If we are using congestion control and the alg is not
+   * old-school 'fixed', then use cc->inflight to determine
+   * when sendmes will be sent */
+  if (cc) {
+    if (!cc->inflight)
+      return false;
+
+    /* This check must be +1 because this function is called *before*
+     * inflight is incremented for the sent cell */
+    if ((cc->inflight+1) % cc->sendme_inc != 0)
+      return false;
+
+    return true;
+  }
+
+  /* At the start of the window, no SENDME will be expected. */
+  if (window == CIRCWINDOW_START) {
+    return false;
+  }
+
+  /* Are we at the limit of the increment and if not, we don't expect next
+   * cell is a SENDME.
+   *
+   * We test against the window minus 1 because when we are looking if the
+   * next cell is a SENDME, the window (either package or deliver) hasn't been
+   * decremented just yet so when this is called, we are currently processing
+   * the "window - 1" cell.
+   */
+  if (((window - 1) % CIRCWINDOW_INCREMENT) != 0) {
+    return false;
+  }
+
+  /* Next cell is expected to be a SENDME. */
+  return true;
+}
+
+/**
+ * Call-in to tell congestion control code that this circuit sent a cell.
+ *
+ * This updates the 'inflight' counter, and if this is a cell that will
+ * cause the other end to send a SENDME, record the current time in a list
+ * of pending timestamps, so that we can later compute the circuit RTT when
+ * the SENDME comes back. */
+void
+congestion_control_note_cell_sent(congestion_control_t *cc,
+                                  const circuit_t *circ,
+                                  const crypt_path_t *cpath)
+{
+  tor_assert(circ);
+  tor_assert(cc);
+
+  /* Is this the last cell before a SENDME? The idea is that if the
+   * package_window reaches a multiple of the increment, after this cell, we
+   * should expect a SENDME. Note that this function must be called *before*
+   * we account for the sent cell. */
+  if (!circuit_sent_cell_for_sendme(circ, cpath)) {
+    cc->inflight++;
+    return;
+  }
+
+  cc->inflight++;
+
+  /* Record this cell time for RTT computation when SENDME arrives */
+  enqueue_timestamp(cc->sendme_pending_timestamps,
+                    monotime_absolute_usec());
+}
+
+/**
+ * Returns true if any edge connections are active.
+ *
+ * We need to know this so that we can stop computing BDP if the
+ * edges are not sending on the circuit.
+ */
+static int
+circuit_has_active_streams(const circuit_t *circ,
+                           const crypt_path_t *layer_hint)
+{
+  const edge_connection_t *streams;
+
+  if (CIRCUIT_IS_ORIGIN(circ)) {
+    streams = CONST_TO_ORIGIN_CIRCUIT(circ)->p_streams;
+  } else {
+    streams = CONST_TO_OR_CIRCUIT(circ)->n_streams;
+  }
+
+  /* Check linked list of streams */
+  for (const edge_connection_t *conn = streams; conn != NULL;
+       conn = conn->next_stream) {
+    if (conn->base_.marked_for_close)
+      continue;
+
+    if (!layer_hint || conn->cpath_layer == layer_hint) {
+      if (connection_get_inbuf_len(TO_CONN(conn)) > 0) {
+        log_info(LD_CIRC, "CC: More in edge inbuf...");
+        return 1;
+      }
+
+      /* If we did not reach EOF on this read, there's more */
+      if (!TO_CONN(conn)->inbuf_reached_eof) {
+        log_info(LD_CIRC, "CC: More on edge conn...");
+        return 1;
+      }
+
+      if (TO_CONN(conn)->linked_conn) {
+        if (connection_get_inbuf_len(TO_CONN(conn)->linked_conn) > 0) {
+          log_info(LD_CIRC, "CC: More in linked inbuf...");
+          return 1;
+        }
+
+        /* If there is a linked conn, and *it* did not each EOF,
+         * there's more */
+        if (!TO_CONN(conn)->linked_conn->inbuf_reached_eof) {
+          log_info(LD_CIRC, "CC: More on linked conn...");
+          return 1;
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+/**
+ * Upon receipt of a SENDME, pop the oldest timestamp off the timestamp
+ * list, and use this to update RTT.
+ *
+ * Returns true if circuit estimates were successfully updated, false
+ * otherwise.
+ */
+bool
+congestion_control_update_circuit_estimates(congestion_control_t *cc,
+                                            const circuit_t *circ,
+                                            const crypt_path_t *layer_hint)
+{
+  uint64_t now_usec = monotime_absolute_usec();
+
+  /* Update RTT first, then BDP. BDP needs fresh RTT */
+  uint64_t curr_rtt_usec = congestion_control_update_circuit_rtt(cc, now_usec);
+  return congestion_control_update_circuit_bdp(cc, circ, layer_hint, now_usec,
+                                               curr_rtt_usec);
+}
+
+/**
+ * Returns true if we have enough time data to use heuristics
+ * to compare RTT to a baseline.
+ */
+static bool
+time_delta_should_use_heuristics(const congestion_control_t *cc)
+{
+
+  /* If we have exited slow start, we should have processed at least
+   * a cwnd worth of RTTs */
+  if (!cc->in_slow_start) {
+    return true;
+  }
+
+  /* If we managed to get enough acks to estimate a SENDME BDP, then
+   * we have enough to estimate clock jumps relative to a baseline,
+   * too. (This is at least 'cc_bwe_min' acks). */
+  if (cc->bdp[BDP_ALG_SENDME_RATE]) {
+    return true;
+  }
+
+  /* Not enough data to estimate clock jumps */
+  return false;
+}
+
+static bool is_monotime_clock_broken = false;
+
+/**
+ * Returns true if the monotime delta is 0, or is significantly
+ * different than the previous delta. Either case indicates
+ * that the monotime time source stalled or jumped.
+ *
+ * Also caches the clock state in the is_monotime_clock_broken flag,
+ * so we can also provide a is_monotime_clock_reliable() function,
+ * used by flow control rate timing.
+ */
+static bool
+time_delta_stalled_or_jumped(const congestion_control_t *cc,
+                             uint64_t old_delta, uint64_t new_delta)
+{
+#define DELTA_DISCREPENCY_RATIO_MAX 100
+  /* If we have a 0 new_delta, that is definitely a monotime stall */
+  if (new_delta == 0) {
+    static ratelim_t stall_info_limit = RATELIM_INIT(60);
+    log_fn_ratelim(&stall_info_limit, LOG_INFO, LD_CIRC,
+           "Congestion control cannot measure RTT due to monotime stall.");
+
+    /* If delta is every 0, the monotime clock has stalled, and we should
+     * not use it anywhere. */
+    is_monotime_clock_broken = true;
+
+    return is_monotime_clock_broken;
+  }
+
+  /* If the old_delta is 0, we have no previous values on this circuit.
+   *
+   * So, return the global monotime status from other circuits, and
+   * do not update.
+   */
+  if (old_delta == 0) {
+    return is_monotime_clock_broken;
+  }
+
+  /*
+   * For the heuristic cases, we need at least a few timestamps,
+   * to average out any previous partial stalls or jumps. So until
+   * than point, let's just use the cached status from other circuits.
+   */
+  if (!time_delta_should_use_heuristics(cc)) {
+    return is_monotime_clock_broken;
+  }
+
+  /* If old_delta is significantly larger than new_delta, then
+   * this means that the monotime clock recently stopped moving
+   * forward. */
+  if (old_delta > new_delta * DELTA_DISCREPENCY_RATIO_MAX) {
+    static ratelim_t dec_notice_limit = RATELIM_INIT(300);
+    log_fn_ratelim(&dec_notice_limit, LOG_NOTICE, LD_CIRC,
+           "Sudden decrease in circuit RTT (%"PRIu64" vs %"PRIu64
+           "), likely due to clock jump.",
+           new_delta/1000, old_delta/1000);
+
+    is_monotime_clock_broken = true;
+
+    return is_monotime_clock_broken;
+  }
+
+  /* If new_delta is significantly larger than old_delta, then
+   * this means that the monotime clock suddenly jumped forward. */
+  if (new_delta > old_delta * DELTA_DISCREPENCY_RATIO_MAX) {
+    static ratelim_t dec_notice_limit = RATELIM_INIT(300);
+    log_fn_ratelim(&dec_notice_limit, LOG_NOTICE, LD_CIRC,
+           "Sudden increase in circuit RTT (%"PRIu64" vs %"PRIu64
+           "), likely due to clock jump.",
+           new_delta/1000, old_delta/1000);
+
+    is_monotime_clock_broken = true;
+
+    return is_monotime_clock_broken;
+  }
+
+  /* All good! Update cached status, too */
+  is_monotime_clock_broken = false;
+
+  return is_monotime_clock_broken;
+}
+
+/**
+ * Is the monotime clock stalled according to any circuits?
+ */
+bool
+is_monotime_clock_reliable(void)
+{
+  return !is_monotime_clock_broken;
+}
+
+/**
+ * Called when we get a SENDME. Updates circuit RTT by pulling off a
+ * timestamp of when we sent the CIRCWINDOW_INCREMENT-th cell from
+ * the queue of such timestamps, and comparing that to current time.
+ *
+ * Also updates min, max, and EWMA of RTT.
+ *
+ * Returns the current circuit RTT in usecs, or 0 if it could not be
+ * measured (due to clock jump, stall, etc).
+ */
+static uint64_t
+congestion_control_update_circuit_rtt(congestion_control_t *cc,
+                                      uint64_t now_usec)
+{
+  uint64_t rtt, ewma_cnt;
+  uint64_t sent_at_timestamp;
+
+  tor_assert(cc);
+
+  /* Get the time that we sent the cell that resulted in the other
+   * end sending this sendme. Use this to calculate RTT */
+  sent_at_timestamp = dequeue_timestamp(cc->sendme_pending_timestamps);
+
+  rtt = now_usec - sent_at_timestamp;
+
+  /* Do not update RTT at all if it looks fishy */
+  if (time_delta_stalled_or_jumped(cc, cc->ewma_rtt_usec, rtt)) {
+    return 0;
+  }
+
+  ewma_cnt = cc->ewma_cwnd_cnt*sendme_acks_per_cwnd(cc);
+  ewma_cnt = MAX(ewma_cnt, 2); // Use at least 2
+
+  cc->ewma_rtt_usec = n_count_ewma(rtt, cc->ewma_rtt_usec, ewma_cnt);
+
+  if (rtt > cc->max_rtt_usec) {
+    cc->max_rtt_usec = rtt;
+  }
+
+  if (cc->min_rtt_usec == 0 || rtt < cc->min_rtt_usec) {
+    cc->min_rtt_usec = rtt;
+  }
+
+  return rtt;
+}
+
+/**
+ * Called when we get a SENDME. Updates the bandwidth-delay-product (BDP)
+ * estimates of a circuit. Several methods of computing BDP are used,
+ * depending on scenario. While some congestion control algorithms only
+ * use one of these methods, we update them all because it's quick and easy.
+ *
+ * - now_usec is the current monotime in usecs.
+ * - curr_rtt_usec is the current circuit RTT in usecs. It may be 0 if no
+ *   RTT could bemeasured.
+ *
+ * Returns true if we were able to update BDP, false otherwise.
+ */
+static bool
+congestion_control_update_circuit_bdp(congestion_control_t *cc,
+                                      const circuit_t *circ,
+                                      const crypt_path_t *layer_hint,
+                                      uint64_t now_usec,
+                                      uint64_t curr_rtt_usec)
+{
+  int chan_q = 0;
+  unsigned int blocked_on_chan = 0;
+  uint64_t timestamp_usec;
+  uint64_t sendme_rate_bdp = 0;
+
+  tor_assert(cc);
+
+  if (CIRCUIT_IS_ORIGIN(circ)) {
+    /* origin circs use n_chan */
+    chan_q = circ->n_chan_cells.n;
+    blocked_on_chan = circ->streams_blocked_on_n_chan;
+  } else {
+    /* Both onion services and exits use or_circuit and p_chan */
+    chan_q = CONST_TO_OR_CIRCUIT(circ)->p_chan_cells.n;
+    blocked_on_chan = circ->streams_blocked_on_p_chan;
+  }
+
+  /* If we have no EWMA RTT, it is because monotime has been stalled
+   * or messed up the entire time so far. Set our BDP estimates directly
+   * to current cwnd */
+  if (!cc->ewma_rtt_usec) {
+     uint64_t cwnd = cc->cwnd;
+
+     /* If the channel is blocked, keep subtracting off the chan_q
+      * until we hit the min cwnd. */
+     if (blocked_on_chan) {
+       cwnd = MAX(cwnd - chan_q, cc->cwnd_min);
+       cc->blocked_chan = 1;
+     } else {
+       cc->blocked_chan = 0;
+     }
+
+     cc->bdp[BDP_ALG_CWND_RTT] = cwnd;
+     cc->bdp[BDP_ALG_INFLIGHT_RTT] = cwnd;
+     cc->bdp[BDP_ALG_SENDME_RATE] = cwnd;
+     cc->bdp[BDP_ALG_PIECEWISE] = cwnd;
+
+     static ratelim_t dec_notice_limit = RATELIM_INIT(300);
+     log_fn_ratelim(&dec_notice_limit, LOG_NOTICE, LD_CIRC,
+            "Our clock has been stalled for the entire lifetime of a circuit. "
+            "Performance may be sub-optimal.");
+
+     return blocked_on_chan;
+  }
+
+  /* Congestion window based BDP will respond to changes in RTT only, and is
+   * relative to cwnd growth. It is useful for correcting for BDP
+   * overestimation, but if BDP is higher than the current cwnd, it will
+   * underestimate it.
+   *
+   * We multiply here first to avoid precision issues from min_RTT being
+   * close to ewma RTT. Since all fields are u64, there is plenty of
+   * room here to multiply first.
+   */
+  cc->bdp[BDP_ALG_CWND_RTT] = cc->cwnd*cc->min_rtt_usec/cc->ewma_rtt_usec;
+
+  /*
+   * If we have no pending streams, we do not have enough data to fill
+   * the BDP, so preserve our old estimates but do not make any more.
+   */
+  if (!blocked_on_chan && !circuit_has_active_streams(circ, layer_hint)) {
+    log_info(LD_CIRC,
+               "CC: Streams drained. Spare package window: %"PRIu64
+               ", no BDP update", cc->cwnd - cc->inflight);
+
+    /* Clear SENDME timestamps; they will be wrong with intermittent data */
+    SMARTLIST_FOREACH(cc->sendme_arrival_timestamps, uint64_t *, t,
+                      tor_free(t));
+    smartlist_clear(cc->sendme_arrival_timestamps);
+  } else if (curr_rtt_usec && is_monotime_clock_reliable()) {
+    /* Sendme-based BDP will quickly measure BDP in much less than
+     * a cwnd worth of data when in use (in 2-10 SENDMEs).
+     *
+     * But if the link goes idle, it will be vastly lower than true BDP. Hence
+     * we only compute it if we have either pending stream data, or streams
+     * are still blocked on the channel queued data.
+     *
+     * We also do not compute it if we do not have a current RTT passed in,
+     * because that means that monotime is currently stalled or just jumped.
+     */
+    enqueue_timestamp(cc->sendme_arrival_timestamps, now_usec);
+
+    if (smartlist_len(cc->sendme_arrival_timestamps) >= cc->bwe_sendme_min) {
+      /* If we have more sendmes than fit in a cwnd, trim the list.
+       * Those are not acurrately measuring throughput, if cwnd is
+       * currently smaller than BDP */
+      while (smartlist_len(cc->sendme_arrival_timestamps) >
+             cc->bwe_sendme_min &&
+             (uint64_t)smartlist_len(cc->sendme_arrival_timestamps) >
+                       sendme_acks_per_cwnd(cc)) {
+        (void)dequeue_timestamp(cc->sendme_arrival_timestamps);
+      }
+      int sendme_cnt = smartlist_len(cc->sendme_arrival_timestamps);
+
+      /* Calculate SENDME_BWE_COUNT pure average */
+      timestamp_usec = peek_timestamp(cc->sendme_arrival_timestamps);
+      uint64_t delta = now_usec - timestamp_usec;
+
+      /* The acked data is in sendme_cnt-1 chunks, because we are counting the
+       * data that is processed by the other endpoint *between* all of these
+       * sendmes. There's one less gap between the sendmes than the number
+       * of sendmes. */
+      uint64_t cells = (sendme_cnt-1)*cc->sendme_inc;
+
+      /* The bandwidth estimate is cells/delta, which when multiplied
+       * by min RTT obtains the BDP. However, we multiply first to
+       * avoid precision issues with the RTT being close to delta in size. */
+      sendme_rate_bdp = cells*cc->min_rtt_usec/delta;
+
+      /* Calculate BDP_EWMA_COUNT N-EWMA */
+      cc->bdp[BDP_ALG_SENDME_RATE] =
+                 n_count_ewma(sendme_rate_bdp, cc->bdp[BDP_ALG_SENDME_RATE],
+                              cc->ewma_cwnd_cnt*sendme_acks_per_cwnd(cc));
+    }
+
+    /* In-flight BDP will cause the cwnd to drift down when underutilized.
+     * It is most useful when the local OR conn is blocked, so we only
+     * compute it if we're utilized. */
+    cc->bdp[BDP_ALG_INFLIGHT_RTT] =
+        (cc->inflight - chan_q)*cc->min_rtt_usec/
+                              MAX(cc->ewma_rtt_usec, curr_rtt_usec);
+  } else {
+    /* We can still update inflight with just an EWMA RTT, but only
+     * if there is data flowing */
+    cc->bdp[BDP_ALG_INFLIGHT_RTT] =
+        (cc->inflight - chan_q)*cc->min_rtt_usec/cc->ewma_rtt_usec;
+  }
+
+  /* The orconn is blocked; use smaller of inflight vs SENDME */
+  if (blocked_on_chan) {
+    log_info(LD_CIRC, "CC: Streams blocked on circ channel. Chanq: %d",
+             chan_q);
+
+    /* A blocked channel is an immediate congestion signal, but it still
+     * happens only once per cwnd */
+    if (!cc->blocked_chan) {
+      cc->next_cc_event = 0;
+      cc->blocked_chan = 1;
+    }
+
+    if (cc->bdp[BDP_ALG_SENDME_RATE]) {
+      cc->bdp[BDP_ALG_PIECEWISE] = MIN(cc->bdp[BDP_ALG_INFLIGHT_RTT],
+                                      cc->bdp[BDP_ALG_SENDME_RATE]);
+    } else {
+      cc->bdp[BDP_ALG_PIECEWISE] = cc->bdp[BDP_ALG_INFLIGHT_RTT];
+    }
+  } else {
+    /* If we were previously blocked, emit a new congestion event
+     * now that we are unblocked, to re-evaluate cwnd */
+    if (cc->blocked_chan) {
+      cc->blocked_chan = 0;
+      cc->next_cc_event = 0;
+      log_info(LD_CIRC, "CC: Streams un-blocked on circ channel. Chanq: %d",
+               chan_q);
+    }
+
+    cc->bdp[BDP_ALG_PIECEWISE] = MAX(cc->bdp[BDP_ALG_SENDME_RATE],
+                                     cc->bdp[BDP_ALG_CWND_RTT]);
+  }
+
+  /* We can end up with no piecewise value if we didn't have either
+   * a SENDME estimate or enough data for an inflight estimate.
+   * It also happens on the very first sendme, since we need two
+   * to get a BDP. In these cases, use the cwnd method. */
+  if (!cc->bdp[BDP_ALG_PIECEWISE]) {
+    cc->bdp[BDP_ALG_PIECEWISE] = cc->bdp[BDP_ALG_CWND_RTT];
+    log_info(LD_CIRC, "CC: No piecewise BDP. Using %"PRIu64,
+             cc->bdp[BDP_ALG_PIECEWISE]);
+  }
+
+  if (cc->next_cc_event == 0) {
+    if (CIRCUIT_IS_ORIGIN(circ)) {
+      log_info(LD_CIRC,
+                 "CC: Circuit %d "
+                 "SENDME RTT: %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", "
+                 "BDP estimates: "
+                 "%"PRIu64", "
+                 "%"PRIu64", "
+                 "%"PRIu64", "
+                 "%"PRIu64", "
+                 "%"PRIu64". ",
+               CONST_TO_ORIGIN_CIRCUIT(circ)->global_identifier,
+               cc->min_rtt_usec/1000,
+               curr_rtt_usec/1000,
+               cc->ewma_rtt_usec/1000,
+               cc->max_rtt_usec/1000,
+               cc->bdp[BDP_ALG_INFLIGHT_RTT],
+               cc->bdp[BDP_ALG_CWND_RTT],
+               sendme_rate_bdp,
+               cc->bdp[BDP_ALG_SENDME_RATE],
+               cc->bdp[BDP_ALG_PIECEWISE]
+               );
+    } else {
+      log_info(LD_CIRC,
+                 "CC: Circuit %"PRIu64":%d "
+                 "SENDME RTT: %"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64", "
+                 "%"PRIu64", "
+                 "%"PRIu64", "
+                 "%"PRIu64", "
+                 "%"PRIu64", "
+                 "%"PRIu64". ",
+                 // XXX: actually, is this p_chan here? This is
+                 // an or_circuit (exit or onion)
+                 circ->n_chan->global_identifier, circ->n_circ_id,
+                 cc->min_rtt_usec/1000,
+                 curr_rtt_usec/1000,
+                 cc->ewma_rtt_usec/1000,
+                 cc->max_rtt_usec/1000,
+                 cc->bdp[BDP_ALG_INFLIGHT_RTT],
+                 cc->bdp[BDP_ALG_CWND_RTT],
+                 sendme_rate_bdp,
+                 cc->bdp[BDP_ALG_SENDME_RATE],
+                 cc->bdp[BDP_ALG_PIECEWISE]
+                 );
+    }
+  }
+
+  /* We updated BDP this round if either we had a blocked channel, or
+   * the curr_rtt_usec was not 0. */
+  bool ret = (blocked_on_chan || curr_rtt_usec != 0);
+  if (ret) {
+    tor_trace(TR_SUBSYS(cc), TR_EV(bdp_update), circ, cc, curr_rtt_usec,
+              sendme_rate_bdp);
+  }
+  return ret;
+}
+
+/**
+ * Dispatch the sendme to the appropriate congestion control algorithm.
+ */
+int
+congestion_control_dispatch_cc_alg(congestion_control_t *cc,
+                                   const circuit_t *circ,
+                                   const crypt_path_t *layer_hint)
+{
+  int ret = -END_CIRC_REASON_INTERNAL;
+  switch (cc->cc_alg) {
+    case CC_ALG_WESTWOOD:
+      ret = congestion_control_westwood_process_sendme(cc, circ, layer_hint);
+      break;
+
+    case CC_ALG_VEGAS:
+      ret = congestion_control_vegas_process_sendme(cc, circ, layer_hint);
+      break;
+
+    case CC_ALG_NOLA:
+      ret = congestion_control_nola_process_sendme(cc, circ, layer_hint);
+      break;
+
+    case CC_ALG_SENDME:
+    default:
+      tor_assert(0);
+  }
+
+  if (cc->cwnd > cwnd_max) {
+    static ratelim_t cwnd_limit = RATELIM_INIT(60);
+    log_fn_ratelim(&cwnd_limit, LOG_NOTICE, LD_CIRC,
+           "Congestion control cwnd %"PRIu64" exceeds max %d, clamping.",
+           cc->cwnd, cwnd_max);
+    cc->cwnd = cwnd_max;
+  }
+
+  return ret;
+}
diff --git a/src/core/or/congestion_control_common.h b/src/core/or/congestion_control_common.h
new file mode 100644
index 0000000000..01dbc1ceb4
--- /dev/null
+++ b/src/core/or/congestion_control_common.h
@@ -0,0 +1,113 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_common.h
+ * \brief Public APIs for congestion control
+ **/
+
+#ifndef TOR_CONGESTION_CONTROL_COMMON_H
+#define TOR_CONGESTION_CONTROL_COMMON_H
+
+#include "core/or/crypt_path_st.h"
+#include "core/or/circuit_st.h"
+
+typedef struct congestion_control_t congestion_control_t;
+
+/** Wrapper for the free function, set the CC pointer to NULL after free */
+#define congestion_control_free(cc) \
+    FREE_AND_NULL(congestion_control_t, congestion_control_free_, cc)
+
+void congestion_control_free_(congestion_control_t *cc);
+
+congestion_control_t *congestion_control_new(void);
+
+int congestion_control_dispatch_cc_alg(congestion_control_t *cc,
+                                       const circuit_t *circ,
+                                       const crypt_path_t *layer_hint);
+
+void congestion_control_note_cell_sent(congestion_control_t *cc,
+                                       const circuit_t *circ,
+                                       const crypt_path_t *cpath);
+
+bool congestion_control_update_circuit_estimates(congestion_control_t *,
+                                                 const circuit_t *,
+                                                 const crypt_path_t *);
+
+int congestion_control_get_package_window(const circuit_t *,
+                                          const crypt_path_t *);
+
+int sendme_get_inc_count(const circuit_t *, const crypt_path_t *);
+bool circuit_sent_cell_for_sendme(const circuit_t *, const crypt_path_t *);
+bool is_monotime_clock_reliable(void);
+
+void congestion_control_new_consensus_params(const networkstatus_t *ns);
+
+/* Ugh, C.. these four are private. Use the getter instead, when
+ * external to the congestion control code. */
+extern uint32_t or_conn_highwater;
+extern uint32_t or_conn_lowwater;
+extern int32_t cell_queue_high;
+extern int32_t cell_queue_low;
+
+/** Stop writing on an orconn when its outbuf is this large */
+static inline uint32_t
+or_conn_highwatermark(void)
+{
+  return or_conn_highwater;
+}
+
+/** Resume writing on an orconn when its outbuf is less than this */
+static inline uint32_t
+or_conn_lowwatermark(void)
+{
+  return or_conn_lowwater;
+}
+
+/** Stop reading on edge connections when we have this many cells
+ * waiting on the appropriate queue. */
+static inline int32_t
+cell_queue_highwatermark(void)
+{
+  return cell_queue_high;
+}
+
+/** Start reading from edge connections again when we get down to this many
+ * cells. */
+static inline int32_t
+cell_queue_lowwatermark(void)
+{
+  return cell_queue_low;
+}
+
+/**
+ * Compute an N-count EWMA, aka N-EWMA. N-EWMA is defined as:
+ *  EWMA = alpha*value + (1-alpha)*EWMA_prev
+ * with alpha = 2/(N+1).
+ *
+ * This works out to:
+ *  EWMA = value*2/(N+1) + EMA_prev*(N-1)/(N+1)
+ *       = (value*2 + EWMA_prev*(N-1))/(N+1)
+ */
+static inline uint64_t
+n_count_ewma(uint64_t curr, uint64_t prev, uint64_t N)
+{
+  if (prev == 0)
+    return curr;
+  else
+    return (2*curr + (N-1)*prev)/(N+1);
+}
+
+/* Private section starts. */
+#ifdef TOR_CONGESTION_CONTROL_PRIVATE
+
+/*
+ * Unit tests declaractions.
+ */
+#ifdef TOR_UNIT_TESTS
+
+#endif /* defined(TOR_UNIT_TESTS) */
+
+#endif /* defined(TOR_CONGESTION_CONTROL_PRIVATE) */
+
+#endif /* !defined(TOR_CONGESTION_CONTROL_COMMON_H) */
diff --git a/src/core/or/congestion_control_flow.c b/src/core/or/congestion_control_flow.c
new file mode 100644
index 0000000000..805654664c
--- /dev/null
+++ b/src/core/or/congestion_control_flow.c
@@ -0,0 +1,710 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_flow.c
+ * \brief Code that implements flow control for congestion controlled
+ *        circuits.
+ */
+
+#define TOR_CONGESTION_CONTROL_FLOW_PRIVATE
+
+#include "core/or/or.h"
+
+#include "core/or/relay.h"
+#include "core/mainloop/connection.h"
+#include "core/or/connection_edge.h"
+#include "core/mainloop/mainloop.h"
+#include "core/or/congestion_control_common.h"
+#include "core/or/congestion_control_flow.h"
+#include "core/or/congestion_control_st.h"
+#include "core/or/circuitlist.h"
+#include "core/or/trace_probes_cc.h"
+#include "feature/nodelist/networkstatus.h"
+#include "trunnel/flow_control_cells.h"
+
+#include "core/or/connection_st.h"
+#include "core/or/cell_st.h"
+#include "app/config/config.h"
+
+/** Cache consensus parameters */
+static uint32_t xoff_client;
+static uint32_t xoff_exit;
+
+static uint32_t xon_change_pct;
+static uint32_t xon_ewma_cnt;
+static uint32_t xon_rate_bytes;
+
+/* In normal operation, we can get a burst of up to 32 cells before returning
+ * to libevent to flush the outbuf. This is a heuristic from hardcoded values
+ * and strange logic in connection_bucket_get_share(). */
+#define MAX_EXPECTED_CELL_BURST 32
+
+/* The following three are for dropmark rate limiting. They define when we
+ * scale down our XON, XOFF, and xmit byte counts. Early scaling is beneficial
+ * because it limits the ability of spurious XON/XOFF to be sent after large
+ * amounts of data without XON/XOFF. At these limits, after 10MB of data (or
+ * more), an adversary can only inject (log2(10MB)-log2(200*500))*100 ~= 1000
+ * cells of fake XOFF/XON before the xmit byte count will be halved enough to
+ * triggering a limit. */
+#define XON_COUNT_SCALE_AT 200
+#define XOFF_COUNT_SCALE_AT 200
+#define ONE_MEGABYTE (UINT64_C(1) << 20)
+#define TOTAL_XMIT_SCALE_AT (10 * ONE_MEGABYTE)
+
+/**
+ * Return the congestion control object of the given edge connection.
+ *
+ * Returns NULL if the edge connection doesn't have a cpath_layer or not
+ * attached to a circuit. But also if the cpath_layer or circuit doesn't have a
+ * congestion control object.
+ */
+static inline const congestion_control_t *
+edge_get_ccontrol(const edge_connection_t *edge)
+{
+  if (edge->cpath_layer)
+    return edge->cpath_layer->ccontrol;
+  else if (edge->on_circuit)
+    return edge->on_circuit->ccontrol;
+  else
+    return NULL;
+}
+
+/**
+ * Update global congestion control related consensus parameter values, every
+ * consensus update.
+ *
+ * More details for each of the parameters can be found in proposal 324,
+ * section 6.5 including tuning notes.
+ */
+void
+flow_control_new_consensus_params(const networkstatus_t *ns)
+{
+#define CC_XOFF_CLIENT_DFLT 500
+#define CC_XOFF_CLIENT_MIN 1
+#define CC_XOFF_CLIENT_MAX 10000
+  xoff_client = networkstatus_get_param(ns, "cc_xoff_client",
+      CC_XOFF_CLIENT_DFLT,
+      CC_XOFF_CLIENT_MIN,
+      CC_XOFF_CLIENT_MAX)*RELAY_PAYLOAD_SIZE;
+
+#define CC_XOFF_EXIT_DFLT 500
+#define CC_XOFF_EXIT_MIN 1
+#define CC_XOFF_EXIT_MAX 10000
+  xoff_exit = networkstatus_get_param(ns, "cc_xoff_exit",
+      CC_XOFF_EXIT_DFLT,
+      CC_XOFF_EXIT_MIN,
+      CC_XOFF_EXIT_MAX)*RELAY_PAYLOAD_SIZE;
+
+#define CC_XON_CHANGE_PCT_DFLT 25
+#define CC_XON_CHANGE_PCT_MIN 1
+#define CC_XON_CHANGE_PCT_MAX 99
+  xon_change_pct = networkstatus_get_param(ns, "cc_xon_change_pct",
+      CC_XON_CHANGE_PCT_DFLT,
+      CC_XON_CHANGE_PCT_MIN,
+      CC_XON_CHANGE_PCT_MAX);
+
+#define CC_XON_RATE_BYTES_DFLT (500)
+#define CC_XON_RATE_BYTES_MIN (1)
+#define CC_XON_RATE_BYTES_MAX (5000)
+  xon_rate_bytes = networkstatus_get_param(ns, "cc_xon_rate",
+      CC_XON_RATE_BYTES_DFLT,
+      CC_XON_RATE_BYTES_MIN,
+      CC_XON_RATE_BYTES_MAX)*RELAY_PAYLOAD_SIZE;
+
+#define CC_XON_EWMA_CNT_DFLT (2)
+#define CC_XON_EWMA_CNT_MIN (1)
+#define CC_XON_EWMA_CNT_MAX (100)
+  xon_ewma_cnt = networkstatus_get_param(ns, "cc_xon_ewma_cnt",
+      CC_XON_EWMA_CNT_DFLT,
+      CC_XON_EWMA_CNT_MIN,
+      CC_XON_EWMA_CNT_MAX);
+}
+
+/**
+ * Send an XOFF for this stream, and note that we sent one
+ */
+static void
+circuit_send_stream_xoff(edge_connection_t *stream)
+{
+  xoff_cell_t xoff;
+  uint8_t payload[CELL_PAYLOAD_SIZE];
+  ssize_t xoff_size;
+
+  memset(&xoff, 0, sizeof(xoff));
+  memset(payload, 0, sizeof(payload));
+
+  xoff_cell_set_version(&xoff, 0);
+
+  if ((xoff_size = xoff_cell_encode(payload, CELL_PAYLOAD_SIZE, &xoff)) < 0) {
+    log_warn(LD_BUG, "Failed to encode xon cell");
+    return;
+  }
+
+  if (connection_edge_send_command(stream, RELAY_COMMAND_XOFF,
+                               (char*)payload, (size_t)xoff_size) == 0) {
+    stream->xoff_sent = true;
+  }
+}
+
+/**
+ * Compute the recent drain rate (write rate) for this edge
+ * connection and return it, in KB/sec (1000 bytes/sec).
+ *
+ * Returns 0 if the monotime clock is busted.
+ */
+static inline uint32_t
+compute_drain_rate(const edge_connection_t *stream)
+{
+  if (BUG(!is_monotime_clock_reliable())) {
+    log_warn(LD_BUG, "Computing drain rate with stalled monotime clock");
+    return 0;
+  }
+
+  uint64_t delta = monotime_absolute_usec() - stream->drain_start_usec;
+
+  if (delta == 0) {
+    log_warn(LD_BUG, "Computing stream drain rate with zero time delta");
+    return 0;
+  }
+
+  /* Overflow checks */
+  if (stream->prev_drained_bytes > INT32_MAX/1000 || /* Intermediate */
+      stream->prev_drained_bytes/delta > INT32_MAX/1000) { /* full value */
+    return INT32_MAX;
+  }
+
+  /* kb/sec = bytes/usec * 1000 usec/msec * 1000 msec/sec * kb/1000bytes */
+  return MAX(1, (uint32_t)(stream->prev_drained_bytes * 1000)/delta);
+}
+
+/**
+ * Send an XON for this stream, with appropriate advisory rate information.
+ *
+ * Reverts the xoff sent status, and stores the rate information we sent,
+ * in case it changes.
+ */
+static void
+circuit_send_stream_xon(edge_connection_t *stream)
+{
+  xon_cell_t xon;
+  uint8_t payload[CELL_PAYLOAD_SIZE];
+  ssize_t xon_size;
+
+  memset(&xon, 0, sizeof(xon));
+  memset(payload, 0, sizeof(payload));
+
+  xon_cell_set_version(&xon, 0);
+  xon_cell_set_kbps_ewma(&xon, stream->ewma_drain_rate);
+
+  if ((xon_size = xon_cell_encode(payload, CELL_PAYLOAD_SIZE, &xon)) < 0) {
+    log_warn(LD_BUG, "Failed to encode xon cell");
+    return;
+  }
+
+  /* Store the advisory rate information, to send advisory updates if
+   * it changes */
+  stream->ewma_rate_last_sent = stream->ewma_drain_rate;
+
+  if (connection_edge_send_command(stream, RELAY_COMMAND_XON, (char*)payload,
+                                   (size_t)xon_size) == 0) {
+    /* Revert the xoff sent status, so we can send another one if need be */
+    stream->xoff_sent = false;
+  }
+}
+
+/**
+ * Process a stream XOFF, parsing it, and then stopping reading on
+ * the edge connection.
+ *
+ * Record that we have recieved an xoff, so we know not to resume
+ * reading on this edge conn until we get an XON.
+ *
+ * Returns false if the XOFF did not validate; true if it does.
+ */
+bool
+circuit_process_stream_xoff(edge_connection_t *conn,
+                            const crypt_path_t *layer_hint,
+                            const cell_t *cell)
+{
+  (void)cell;
+  bool retval = true;
+
+  if (BUG(!conn)) {
+    log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+           "Got XOFF on invalid stream?");
+    return false;
+  }
+
+  /* Make sure this XOFF came from the right hop */
+  if (layer_hint && layer_hint != conn->cpath_layer) {
+    log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+            "Got XOFF from wrong hop.");
+    return false;
+  }
+
+  if (edge_get_ccontrol(conn) == NULL) {
+    log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+           "Got XOFF for non-congestion control circuit");
+    return false;
+  }
+
+  if (conn->xoff_received) {
+    log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+           "Got multiple XOFF on connection");
+    return false;
+  }
+
+  /* If we are near the max, scale everything down */
+  if (conn->num_xoff_recv == XOFF_COUNT_SCALE_AT) {
+    log_info(LD_EDGE, "Scaling down for XOFF count: %d %d %d",
+             conn->total_bytes_xmit,
+             conn->num_xoff_recv,
+             conn->num_xon_recv);
+    conn->total_bytes_xmit /= 2;
+    conn->num_xoff_recv /= 2;
+    conn->num_xon_recv /= 2;
+  }
+
+  conn->num_xoff_recv++;
+
+  /* Client-side check to make sure that XOFF is not sent too early,
+   * for dropmark attacks. The main sidechannel risk is early cells,
+   * but we also check to make sure that we have not received more XOFFs
+   * than could have been generated by the bytes we sent.
+   */
+  if (TO_CONN(conn)->type == CONN_TYPE_AP || conn->hs_ident != NULL) {
+    uint32_t limit = 0;
+
+    /* TODO: This limit technically needs to come from negotiation,
+     * and be bounds checked for sanity, because the other endpoint
+     * may have a different consensus */
+    if (conn->hs_ident)
+      limit = xoff_client;
+    else
+      limit = xoff_exit;
+
+    if (conn->total_bytes_xmit < limit*conn->num_xoff_recv) {
+      log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+             "Got extra XOFF for bytes sent. Got %d, expected max %d",
+             conn->num_xoff_recv, conn->total_bytes_xmit/limit);
+      /* We still process this, because the only dropmark defenses
+       * in C tor are via the vanguards addon's use of the read valid
+       * cells. So just signal that we think this is not valid protocol
+       * data and proceed. */
+      retval = false;
+    }
+  }
+
+  // TODO: Count how many xoffs we have; log if "too many", for shadow
+  // analysis of chatter. Possibly add to extra-info?
+
+  log_info(LD_EDGE, "Got XOFF!");
+  connection_stop_reading(TO_CONN(conn));
+  conn->xoff_received = true;
+
+  return retval;
+}
+
+/**
+ * Process a stream XON, and if it validates, clear the xoff
+ * flag and resume reading on this edge connection.
+ *
+ * Also, use provided rate information to rate limit
+ * reading on this edge (or packagaing from it onto
+ * the circuit), to avoid XON/XOFF chatter.
+ *
+ * Returns true if the XON validates, false otherwise.
+ */
+bool
+circuit_process_stream_xon(edge_connection_t *conn,
+                           const crypt_path_t *layer_hint,
+                           const cell_t *cell)
+{
+  xon_cell_t *xon;
+  bool retval = true;
+
+  if (BUG(!conn)) {
+    log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+           "Got XON on invalid stream?");
+    return false;
+  }
+
+  /* Make sure this XON came from the right hop */
+  if (layer_hint && layer_hint != conn->cpath_layer) {
+    log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+           "Got XON from wrong hop.");
+    return false;
+  }
+
+  if (edge_get_ccontrol(conn) == NULL) {
+    log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+           "Got XON for non-congestion control circuit");
+    return false;
+  }
+
+  if (xon_cell_parse(&xon, cell->payload+RELAY_HEADER_SIZE,
+                     CELL_PAYLOAD_SIZE-RELAY_HEADER_SIZE) < 0) {
+    log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+          "Received malformed XON cell.");
+    return false;
+  }
+
+  /* If we are near the max, scale everything down */
+  if (conn->num_xon_recv == XON_COUNT_SCALE_AT) {
+    log_info(LD_EDGE, "Scaling down for XON count: %d %d %d",
+             conn->total_bytes_xmit,
+             conn->num_xoff_recv,
+             conn->num_xon_recv);
+    conn->total_bytes_xmit /= 2;
+    conn->num_xoff_recv /= 2;
+    conn->num_xon_recv /= 2;
+  }
+
+  conn->num_xon_recv++;
+
+  /* Client-side check to make sure that XON is not sent too early,
+   * for dropmark attacks. The main sidechannel risk is early cells,
+   * but we also check to see that we did not get more XONs than make
+   * sense for the number of bytes we sent.
+   */
+  if (TO_CONN(conn)->type == CONN_TYPE_AP || conn->hs_ident != NULL) {
+    uint32_t limit = 0;
+
+    /* TODO: This limit technically needs to come from negotiation,
+     * and be bounds checked for sanity, because the other endpoint
+     * may have a different consensus */
+    if (conn->hs_ident)
+      limit = MIN(xoff_client, xon_rate_bytes);
+    else
+      limit = MIN(xoff_exit, xon_rate_bytes);
+
+    if (conn->total_bytes_xmit < limit*conn->num_xon_recv) {
+      log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+             "Got extra XON for bytes sent. Got %d, expected max %d",
+             conn->num_xon_recv, conn->total_bytes_xmit/limit);
+
+      /* We still process this, because the only dropmark defenses
+       * in C tor are via the vanguards addon's use of the read valid
+       * cells. So just signal that we think this is not valid protocol
+       * data and proceed. */
+      retval = false;
+    }
+  }
+
+  log_info(LD_EDGE, "Got XON: %d", xon->kbps_ewma);
+
+  /* Adjust the token bucket of this edge connection with the drain rate in
+   * the XON. Rate is in bytes from kilobit (kpbs). */
+  uint64_t rate = ((uint64_t) xon_cell_get_kbps_ewma(xon) * 1000);
+  if (rate == 0 || INT32_MAX < rate) {
+    /* No rate. */
+    rate = INT32_MAX;
+  }
+  token_bucket_rw_adjust(&conn->bucket, (uint32_t) rate, (uint32_t) rate);
+
+  if (conn->xoff_received) {
+    /* Clear the fact that we got an XOFF, so that this edge can
+     * start and stop reading normally */
+    conn->xoff_received = false;
+    connection_start_reading(TO_CONN(conn));
+  }
+
+  xon_cell_free(xon);
+
+  return retval;
+}
+
+/**
+ * Called from sendme_stream_data_received(), when data arrives
+ * from a circuit to our edge's outbuf, to decide if we need to send
+ * an XOFF.
+ *
+ * Returns the amount of cells remaining until the buffer is full, at
+ * which point it sends an XOFF, and returns 0.
+ *
+ * Returns less than 0 if we have queued more than a congestion window
+ * worth of data and need to close the circuit.
+ */
+int
+flow_control_decide_xoff(edge_connection_t *stream)
+{
+  size_t total_buffered = connection_get_outbuf_len(TO_CONN(stream));
+  uint32_t buffer_limit_xoff = 0;
+
+  if (BUG(edge_get_ccontrol(stream) == NULL)) {
+    log_err(LD_BUG, "Flow control called for non-congestion control circuit");
+    return -1;
+  }
+
+  /* Onion services and clients are typically localhost edges, so they
+   * need different buffering limits than exits do */
+  if (TO_CONN(stream)->type == CONN_TYPE_AP || stream->hs_ident != NULL) {
+    buffer_limit_xoff = xoff_client;
+  } else {
+    buffer_limit_xoff = xoff_exit;
+  }
+
+  if (total_buffered > buffer_limit_xoff) {
+    if (!stream->xoff_sent) {
+      log_info(LD_EDGE, "Sending XOFF: %"TOR_PRIuSZ" %d",
+                 total_buffered, buffer_limit_xoff);
+      tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xoff_sending), stream);
+
+      circuit_send_stream_xoff(stream);
+
+      /* Clear the drain rate. It is considered wrong if we
+       * got all the way to XOFF */
+      stream->ewma_drain_rate = 0;
+    }
+  }
+
+  /* If the outbuf has accumulated more than the expected burst limit of
+   * cells, then assume it is not draining, and call decide_xon. We must
+   * do this because writes only happen when the socket unblocks, so
+   * may not otherwise notice accumulation of data in the outbuf for
+   * advisory XONs. */
+   if (total_buffered > MAX_EXPECTED_CELL_BURST*RELAY_PAYLOAD_SIZE) {
+     flow_control_decide_xon(stream, 0);
+   }
+
+  /* Flow control always takes more data; we rely on the oomkiller to
+   * handle misbehavior. */
+  return 0;
+}
+
+/**
+ * Returns true if the stream's drain rate has changed significantly.
+ *
+ * Returns false if the monotime clock is stalled, or if we have
+ * no previous drain rate information.
+ */
+static bool
+stream_drain_rate_changed(const edge_connection_t *stream)
+{
+  if (!is_monotime_clock_reliable()) {
+    return false;
+  }
+
+  if (!stream->ewma_rate_last_sent) {
+    return false;
+  }
+
+  if (stream->ewma_drain_rate >
+      (100+(uint64_t)xon_change_pct)*stream->ewma_rate_last_sent/100) {
+    return true;
+  }
+
+  if (stream->ewma_drain_rate <
+      (100-(uint64_t)xon_change_pct)*stream->ewma_rate_last_sent/100) {
+    return true;
+  }
+
+  return false;
+}
+
+/**
+ * Called whenever we drain an edge connection outbuf by writing on
+ * its socket, to decide if it is time to send an xon.
+ *
+ * The n_written parameter tells us how many bytes we have written
+ * this time, which is used to compute the advisory drain rate fields.
+ */
+void
+flow_control_decide_xon(edge_connection_t *stream, size_t n_written)
+{
+  size_t total_buffered = connection_get_outbuf_len(TO_CONN(stream));
+
+  /* Bounds check the number of drained bytes, and scale */
+  if (stream->drained_bytes >= UINT32_MAX - n_written) {
+    /* Cut the bytes in half, and move the start time up halfway to now
+     * (if we have one). */
+    stream->drained_bytes /= 2;
+
+    if (stream->drain_start_usec) {
+        uint64_t now = monotime_absolute_usec();
+
+        stream->drain_start_usec = now - (now-stream->drain_start_usec)/2;
+    }
+  }
+
+  /* Accumulate drained bytes since last rate computation */
+  stream->drained_bytes += n_written;
+
+  tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xon), stream, n_written);
+
+  /* Check for bad monotime clock and bytecount wrap */
+  if (!is_monotime_clock_reliable()) {
+    /* If the monotime clock ever goes wrong, the safest thing to do
+     * is just clear our short-term rate info and wait for the clock to
+     * become reliable again.. */
+    stream->drain_start_usec = 0;
+    stream->drained_bytes = 0;
+  } else {
+    /* If we have no drain start timestamp, and we still have
+     * remaining buffer, start the buffering counter */
+    if (!stream->drain_start_usec && total_buffered > 0) {
+      log_debug(LD_EDGE, "Began edge buffering: %d %d %"TOR_PRIuSZ,
+                 stream->ewma_rate_last_sent,
+                 stream->ewma_drain_rate,
+                 total_buffered);
+      tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xon_drain_start),
+                stream);
+      stream->drain_start_usec = monotime_absolute_usec();
+      stream->drained_bytes = 0;
+    }
+  }
+
+  if (stream->drain_start_usec) {
+    /* If we have spent enough time in a queued state, update our drain
+     * rate. */
+    if (stream->drained_bytes > xon_rate_bytes) {
+      /* No previous drained bytes means it is the first time we are computing
+       * it so use the value we just drained onto the socket as a baseline. It
+       * won't be accurate but it will be a start towards the right value.
+       *
+       * We have to do this in order to have a drain rate else we could be
+       * sending a drain rate of 0 in an XON which would be undesirable and
+       * basically like sending an XOFF. */
+      if (stream->prev_drained_bytes == 0) {
+        stream->prev_drained_bytes = stream->drained_bytes;
+      }
+      uint32_t drain_rate = compute_drain_rate(stream);
+      /* Once the drain rate has been computed, note how many bytes we just
+       * drained so it can be used at the next calculation. We do this here
+       * because it gets reset once the rate is changed. */
+      stream->prev_drained_bytes = stream->drained_bytes;
+
+      if (drain_rate) {
+        stream->ewma_drain_rate =
+            (uint32_t)n_count_ewma(drain_rate,
+                                   stream->ewma_drain_rate,
+                                   xon_ewma_cnt);
+        log_debug(LD_EDGE, "Updating drain rate: %d %d %"TOR_PRIuSZ,
+                   drain_rate,
+                   stream->ewma_drain_rate,
+                   total_buffered);
+        tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xon_drain_update),
+                  stream, drain_rate);
+        /* Reset recent byte counts. This prevents us from sending advisory
+         * XONs more frequent than every xon_rate_bytes. */
+        stream->drained_bytes = 0;
+        stream->drain_start_usec = 0;
+      }
+    }
+  }
+
+  /* If we don't have an XOFF outstanding, consider updating an
+   * old rate */
+  if (!stream->xoff_sent) {
+    if (stream_drain_rate_changed(stream)) {
+      /* If we are still buffering and the rate changed, update
+       * advisory XON */
+      log_info(LD_EDGE, "Sending rate-change XON: %d %d %"TOR_PRIuSZ,
+                 stream->ewma_rate_last_sent,
+                 stream->ewma_drain_rate,
+                 total_buffered);
+      tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xon_rate_change), stream);
+      circuit_send_stream_xon(stream);
+    }
+  } else if (total_buffered == 0) {
+    log_info(LD_EDGE, "Sending XON: %d %d %"TOR_PRIuSZ,
+               stream->ewma_rate_last_sent,
+               stream->ewma_drain_rate,
+               total_buffered);
+    tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xon_partial_drain), stream);
+    circuit_send_stream_xon(stream);
+  }
+
+  /* If the buffer has fully emptied, clear the drain timestamp,
+   * so we can total only bytes drained while outbuf is 0. */
+  if (total_buffered == 0) {
+    stream->drain_start_usec = 0;
+
+    /* After we've spent 'xon_rate_bytes' with the queue fully drained,
+     * double any rate we sent. */
+    if (stream->drained_bytes >= xon_rate_bytes &&
+        stream->ewma_rate_last_sent) {
+      stream->ewma_drain_rate = MIN(INT32_MAX, 2*stream->ewma_drain_rate);
+
+      log_debug(LD_EDGE,
+                 "Queue empty for xon_rate_limit bytes: %d %d",
+                 stream->ewma_rate_last_sent,
+                 stream->ewma_drain_rate);
+      tor_trace(TR_SUBSYS(cc), TR_EV(flow_decide_xon_drain_doubled), stream);
+      /* Resetting the drained bytes count. We need to keep its value as a
+       * previous so the drain rate calculation takes into account what was
+       * actually drain the last time. */
+      stream->prev_drained_bytes = stream->drained_bytes;
+      stream->drained_bytes = 0;
+    }
+  }
+
+  return;
+}
+
+/**
+ * Note that we packaged some data on this stream. Used to enforce
+ * client-side dropmark limits
+ */
+void
+flow_control_note_sent_data(edge_connection_t *stream, size_t len)
+{
+  /* If we are near the max, scale everything down */
+  if (stream->total_bytes_xmit >= TOTAL_XMIT_SCALE_AT-len) {
+    log_info(LD_EDGE, "Scaling down for flow control xmit bytes:: %d %d %d",
+             stream->total_bytes_xmit,
+             stream->num_xoff_recv,
+             stream->num_xon_recv);
+
+    stream->total_bytes_xmit /= 2;
+    stream->num_xoff_recv /= 2;
+    stream->num_xon_recv /= 2;
+  }
+
+  stream->total_bytes_xmit += len;
+}
+
+/** Returns true if an edge connection uses flow control */
+bool
+edge_uses_flow_control(const edge_connection_t *stream)
+{
+  bool ret = (stream->on_circuit && stream->on_circuit->ccontrol) ||
+             (stream->cpath_layer && stream->cpath_layer->ccontrol);
+
+  /* All circuits with congestion control use flow control */
+  return ret;
+}
+
+/**
+ * Returns the max RTT for the circuit that carries this stream,
+ * as observed by congestion control.
+ */
+uint64_t
+edge_get_max_rtt(const edge_connection_t *stream)
+{
+  if (stream->on_circuit && stream->on_circuit->ccontrol)
+    return stream->on_circuit->ccontrol->max_rtt_usec;
+  else if (stream->cpath_layer && stream->cpath_layer->ccontrol)
+    return stream->cpath_layer->ccontrol->max_rtt_usec;
+
+  return 0;
+}
+
+/** Returns true if a connection is an edge conn that uses flow control */
+bool
+conn_uses_flow_control(connection_t *conn)
+{
+  bool ret = false;
+
+  if (CONN_IS_EDGE(conn)) {
+    edge_connection_t *edge = TO_EDGE_CONN(conn);
+
+    if (edge_uses_flow_control(edge)) {
+      ret = true;
+    }
+  }
+
+  return ret;
+}
+
diff --git a/src/core/or/congestion_control_flow.h b/src/core/or/congestion_control_flow.h
new file mode 100644
index 0000000000..6c318027ea
--- /dev/null
+++ b/src/core/or/congestion_control_flow.h
@@ -0,0 +1,48 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_flow.h
+ * \brief APIs for stream flow control on congestion controlled circuits.
+ **/
+
+#ifndef TOR_CONGESTION_CONTROL_FLOW_H
+#define TOR_CONGESTION_CONTROL_FLOW_H
+
+#include "core/or/crypt_path_st.h"
+#include "core/or/circuit_st.h"
+#include "core/or/edge_connection_st.h"
+
+void flow_control_new_consensus_params(const struct networkstatus_t *);
+
+bool circuit_process_stream_xoff(edge_connection_t *conn,
+                                 const crypt_path_t *layer_hint,
+                                 const cell_t *cell);
+bool circuit_process_stream_xon(edge_connection_t *conn,
+                                const crypt_path_t *layer_hint,
+                                const cell_t *cell);
+
+int flow_control_decide_xoff(edge_connection_t *stream);
+void flow_control_decide_xon(edge_connection_t *stream, size_t n_written);
+
+void flow_control_note_sent_data(edge_connection_t *stream, size_t len);
+
+bool edge_uses_flow_control(const edge_connection_t *stream);
+
+bool conn_uses_flow_control(connection_t *stream);
+
+uint64_t edge_get_max_rtt(const edge_connection_t *);
+
+/* Private section starts. */
+#ifdef TOR_CONGESTION_CONTROL_FLOW_PRIVATE
+
+/*
+ * Unit tests declaractions.
+ */
+#ifdef TOR_UNIT_TESTS
+
+#endif /* defined(TOR_UNIT_TESTS) */
+
+#endif /* defined(TOR_CONGESTION_CONTROL_FLOW_PRIVATE) */
+
+#endif /* !defined(TOR_CONGESTION_CONTROL_FLOW_H) */
diff --git a/src/core/or/congestion_control_nola.c b/src/core/or/congestion_control_nola.c
new file mode 100644
index 0000000000..09f88d4699
--- /dev/null
+++ b/src/core/or/congestion_control_nola.c
@@ -0,0 +1,126 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_nola.c
+ * \brief Code that implements the TOR_NOLA congestion control algorithm
+ *        from Proposal #324.
+ */
+
+#define TOR_CONGESTION_CONTROL_NOLA_PRIVATE
+
+#include "core/or/or.h"
+
+#include "core/or/crypt_path.h"
+#include "core/or/or_circuit_st.h"
+#include "core/or/sendme.h"
+#include "core/or/congestion_control_st.h"
+#include "core/or/congestion_control_common.h"
+#include "core/or/congestion_control_nola.h"
+#include "core/or/circuituse.h"
+#include "core/or/circuitlist.h"
+#include "core/or/origin_circuit_st.h"
+#include "core/or/channel.h"
+#include "feature/nodelist/networkstatus.h"
+
+#define NOLA_BDP_OVERSHOOT    100
+
+/**
+ * Cache NOLA consensus parameters.
+ */
+void
+congestion_control_nola_set_params(congestion_control_t *cc)
+{
+  tor_assert(cc->cc_alg == CC_ALG_NOLA);
+
+  cc->nola_params.bdp_overshoot =
+      networkstatus_get_param(NULL, "cc_nola_overshoot",
+              NOLA_BDP_OVERSHOOT,
+              0,
+              1000);
+}
+
+/**
+* Process a SENDME and update the congestion window according to the
+* rules specified in TOR_NOLA of Proposal #324.
+*
+* TOR_NOLA updates the congestion window to match the current
+* BDP estimate, every sendme. Because this can result in downward
+* drift, a fixed overhead is added to the BDP estimate. This will
+* cause some queuing, but ensures that the algorithm always uses
+* the full BDP.
+*
+* To handle the case where the local orconn blocks, TOR_NOLA uses
+* the 'piecewise' BDP estimate, which uses more a conservative BDP
+* estimate method when blocking occurrs, but a more aggressive BDP
+* estimate when there is no local blocking. This minimizes local
+* client queues.
+*/
+int
+congestion_control_nola_process_sendme(congestion_control_t *cc,
+                                       const circuit_t *circ,
+                                       const crypt_path_t *layer_hint)
+{
+  tor_assert(cc && cc->cc_alg == CC_ALG_NOLA);
+  tor_assert(circ);
+
+  if (cc->next_cc_event)
+    cc->next_cc_event--;
+
+  /* If we get a congestion event, the only thing NOLA
+   * does is note this as if we exited slow-start
+   * (which for NOLA just means we finished our ICW). */
+  if (cc->next_cc_event == 0)
+    cc->in_slow_start = 0;
+
+  /* If we did not successfully update BDP, we must return. Otherwise,
+   * NOLA can drift downwards */
+  if (!congestion_control_update_circuit_estimates(cc, circ, layer_hint)) {
+    cc->inflight = cc->inflight - cc->sendme_inc;
+    return 0;
+  }
+
+  /* We overshoot the BDP by the cwnd_inc param amount, because BDP
+   * may otherwise drift down. This helps us probe for more capacity.
+   * But there is no sense to do it if the local channel is blocked. */
+  if (cc->blocked_chan)
+    cc->cwnd = cc->bdp[cc->bdp_alg];
+  else
+    cc->cwnd = cc->bdp[cc->bdp_alg] + cc->nola_params.bdp_overshoot;
+
+  /* cwnd can never fall below 1 increment */
+  cc->cwnd = MAX(cc->cwnd, cc->cwnd_min);
+
+  if (CIRCUIT_IS_ORIGIN(circ)) {
+    log_info(LD_CIRC,
+               "CC TOR_NOLA: Circuit %d "
+               "CWND: %"PRIu64", "
+               "INFL: %"PRIu64", "
+               "NCCE: %"PRIu64", "
+               "SS: %d",
+             CONST_TO_ORIGIN_CIRCUIT(circ)->global_identifier,
+             cc->cwnd,
+             cc->inflight,
+             cc->next_cc_event,
+             cc->in_slow_start
+             );
+  } else {
+    log_info(LD_CIRC,
+               "CC TOR_NOLA: Circuit %"PRIu64":%d "
+               "CWND: %"PRIu64", "
+               "INFL: %"PRIu64", "
+               "NCCE: %"PRIu64", "
+               "SS: %d",
+               circ->n_chan->global_identifier, circ->n_circ_id,
+             cc->cwnd,
+             cc->inflight,
+             cc->next_cc_event,
+             cc->in_slow_start
+             );
+  }
+
+  /* Update inflight with ack */
+  cc->inflight = cc->inflight - cc->sendme_inc;
+
+  return 0;
+}
diff --git a/src/core/or/congestion_control_nola.h b/src/core/or/congestion_control_nola.h
new file mode 100644
index 0000000000..9c7d6e0635
--- /dev/null
+++ b/src/core/or/congestion_control_nola.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_nola.h
+ * \brief Private-ish APIs for the TOR_NOLA congestion control algorithm
+ **/
+
+#ifndef TOR_CONGESTION_CONTROL_NOLA_H
+#define TOR_CONGESTION_CONTROL_NOLA_H
+
+#include "core/or/crypt_path_st.h"
+#include "core/or/circuit_st.h"
+
+/* Processing SENDME cell. */
+int congestion_control_nola_process_sendme(struct congestion_control_t *cc,
+                                           const circuit_t *circ,
+                                           const crypt_path_t *layer_hint);
+void congestion_control_nola_set_params(struct congestion_control_t *cc);
+
+/* Private section starts. */
+#ifdef TOR_CONGESTION_CONTROL_NOLA_PRIVATE
+
+/*
+ * Unit tests declaractions.
+ */
+#ifdef TOR_UNIT_TESTS
+
+#endif /* defined(TOR_UNIT_TESTS) */
+
+#endif /* defined(TOR_CONGESTION_CONTROL_NOLA_PRIVATE) */
+
+#endif /* !defined(TOR_CONGESTION_CONTROL_NOLA_H) */
diff --git a/src/core/or/congestion_control_st.h b/src/core/or/congestion_control_st.h
new file mode 100644
index 0000000000..251ebd82e3
--- /dev/null
+++ b/src/core/or/congestion_control_st.h
@@ -0,0 +1,257 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_st.h
+ * \brief Structure definitions for congestion control.
+ **/
+
+#ifndef CONGESTION_CONTROL_ST_H
+#define CONGESTION_CONTROL_ST_H
+
+#include "core/or/crypt_path_st.h"
+#include "core/or/circuit_st.h"
+
+/** Signifies which sendme algorithm to use */
+typedef enum {
+  /** OG Tor fixed-sized circ and stream windows. It sucks, but it is important
+   * to make sure that the new algs can compete with the old garbage. */
+  CC_ALG_SENDME = 0,
+
+  /**
+   * Prop#324 TOR_WESTWOOD - Deliberately agressive. Westwood may not even
+   * converge to fairness in some cases because max RTT will also increase
+   * on congesgtion, which boosts the Westwood RTT congestion threshhold. So it
+   * can cause runaway queue bloat, which may or may not lead to a robot
+   * uprising... Ok that's Westworld, not Westwood. Still, we need to test
+   * Vegas and NOLA against something more agressive to ensure they do not
+   * starve in the presence of cheaters. We also need to make sure cheaters
+   * trigger the oomkiller in those cases.
+   */
+  CC_ALG_WESTWOOD = 1,
+
+  /**
+   * Prop#324 TOR_VEGAS - TCP Vegas-style BDP tracker. Because Vegas backs off
+   * whenever it detects queue delay, it can be beaten out by more agressive
+   * algs. However, in live network testing, it seems to do just fine against
+   * current SENDMEs. It outperforms Westwood and does not stall. */
+  CC_ALG_VEGAS = 2,
+
+  /**
+   * Prop#324: TOR_NOLA - NOLA looks the BDP right in the eye and uses it
+   * immediately as CWND. No slow start, no other congestion signals, no delay,
+   * no bullshit. Like TOR_VEGAS, it also uses agressive BDP estimates, to
+   * avoid out-competition. It seems a bit better throughput than Vegas,
+   * but its agressive BDP and rapid updates may lead to more queue latency. */
+  CC_ALG_NOLA = 3,
+} cc_alg_t;
+
+/* Total number of CC algs in cc_alg_t enum */
+#define NUM_CC_ALGS  (CC_ALG_NOLA+1)
+
+/** Signifies how we estimate circuit BDP */
+typedef enum {
+  /* CWND-based BDP will respond to changes in RTT only, and is relative
+   * to cwnd growth. So in slow-start, this will under-estimate BDP */
+  BDP_ALG_CWND_RTT = 0,
+
+  /* Sendme-based BDP will quickly measure BDP in less than
+   * a cwnd worth of data when in use. So it should be good for slow-start.
+   * But if the link goes idle, it will be vastly lower than true BDP. Thus,
+   * this estimate gets reset when the cwnd is not fully utilized. */
+  BDP_ALG_SENDME_RATE = 1,
+
+  /* Inflight BDP is similar to the cwnd estimator, except it uses
+   * packets inflight minus local circuit queues instead of current cwnd.
+   * Because it is strictly less than or equal to the cwnd, it will cause
+   * the cwnd to drift downward. It is only used if the local OR connection
+   * is blocked. */
+  BDP_ALG_INFLIGHT_RTT = 2,
+
+  /* The Piecewise BDP estimator uses the CWND estimator before there
+   * are sufficient SENDMEs to calculate the SENDME estimator. At that
+   * point, it uses the SENDME estimator, unless the local OR connection
+   * becomes blocked. In that case, it switches to the inflight estimator. */
+  BDP_ALG_PIECEWISE = 3,
+
+} bdp_alg_t;
+
+/** Total number of BDP algs in bdp_alg_t enum */
+#define NUM_BDP_ALGS (BDP_ALG_PIECEWISE+1)
+
+/** Westwood algorithm parameters */
+struct westwood_params_t {
+    /** Cwnd backoff multiplier upon congestion (as percent) */
+    uint8_t cwnd_backoff_m;
+    /** Max RTT backoff multiplier upon congestion (as percent) */
+    uint8_t rtt_backoff_m;
+
+    /** Threshold between min and max RTT, to signal congestion (percent) */
+    uint8_t rtt_thresh;
+
+    /**
+     * If true, use minimum of BDP and backoff multiplication in backoff.
+     * If false, use maximum of BDP and backoff multiplication in backoff. */
+    bool min_backoff;
+};
+
+/** Vegas algorithm parameters. */
+struct vegas_params_t {
+    /** The queue use allowed before we exit slow start */
+    uint16_t gamma;
+    /** The queue use below which we increment cwnd */
+    uint16_t alpha;
+    /** The queue use above which we decrement cwnd */
+    uint16_t beta;
+    /** Weighted average (percent) between cwnd estimator and
+     * piecewise estimator. */
+    uint8_t bdp_mix_pct;
+};
+
+/** NOLA consensus params */
+struct nola_params_t {
+    /** How many cells to add to BDP estimate to obtain cwnd */
+    uint16_t bdp_overshoot;
+};
+
+/** Fields common to all congestion control algorithms */
+typedef struct congestion_control_t {
+  /**
+   * Smartlist of uint64_t monotime usec timestamps of when we sent a data
+   * cell that is pending a sendme. FIFO queue that is managed similar to
+   * sendme_last_digests. */
+  smartlist_t *sendme_pending_timestamps;
+
+  /**
+   * Smartlist of uint64_t monotime timestamp of when sendme's arrived.
+   * FIFO queue that is managed similar to sendme_last_digests.
+   * Used to estimate circuitbandwidth and BDP. */
+  smartlist_t *sendme_arrival_timestamps;
+
+  /** RTT time data for congestion control. */
+  uint64_t ewma_rtt_usec;
+  uint64_t min_rtt_usec;
+  uint64_t max_rtt_usec;
+
+  /* BDP estimates by algorithm */
+  uint64_t bdp[NUM_BDP_ALGS];
+
+  /** Congestion window */
+  uint64_t cwnd;
+
+  /** Number of cells in-flight (sent but awaiting SENDME ack). */
+  uint64_t inflight;
+
+  /**
+   * For steady-state: the number of sendme acks until we will acknowledge
+   * a congestion event again. It starts out as the number of sendme acks
+   * in a congestion windowm and is decremented each ack. When this reaches
+   * 0, it means we should examine our congestion algorithm conditions.
+   * In this way, we only react to one congestion event per congestion window.
+   *
+   * It is also reset to 0 immediately whenever the circuit's orconn is
+   * blocked, and when a previously blocked orconn is unblocked.
+   */
+  uint64_t next_cc_event;
+
+  /** Are we in slow start? */
+  bool in_slow_start;
+
+  /** Is the local channel blocked on us? That's a congestion signal */
+  bool blocked_chan;
+
+  /* The following parameters are cached from consensus values upon
+   * circuit setup. */
+
+  /** Percent of cwnd to increment by during slow start */
+  uint16_t cwnd_inc_pct_ss;
+
+  /** Number of cells to increment cwnd by during steady state */
+  uint16_t cwnd_inc;
+
+  /** Minimum congestion window (must be at least sendme_inc) */
+  uint16_t cwnd_min;
+
+  /**
+   * Number of times per congestion window to update based on congestion
+   * signals */
+  uint8_t cwnd_inc_rate;
+
+  /**
+   * Number of cwnd worth of sendme acks to smooth RTT and BDP with,
+   * using N_EWMA */
+  uint8_t ewma_cwnd_cnt;
+
+  /**
+   * Minimum number of sendmes before we begin BDP estimates
+   */
+  uint8_t bwe_sendme_min;
+
+  /**
+   * Number of cells to ack with every sendme. Taken from consensus parameter
+   * and negotiation during circuit setup. */
+  uint8_t sendme_inc;
+
+  /** Which congestion control algorithm to use. Taken from
+   * consensus parameter and negotiation during circuit setup. */
+  cc_alg_t cc_alg;
+
+  /** Which algorithm to estimate circuit bandwidth with. Taken from
+   * consensus parameter during circuit setup. */
+  bdp_alg_t bdp_alg;
+
+  /** Algorithm-specific parameters. The specific struct that is used
+   * depends upon the algoritghm selected by the cc_alg parameter.
+   * These should not be accessed anywhere other than the algorithm-specific
+   * files. */
+  union {
+    struct westwood_params_t westwood_params;
+    struct vegas_params_t vegas_params;
+    struct nola_params_t nola_params;
+  };
+} congestion_control_t;
+
+/**
+ * Returns the number of sendme acks we will recieve before we update cwnd.
+ *
+ * Congestion control literature recommends only one update of cwnd per
+ * cwnd worth of acks. However, we can also tune this to be more frequent
+ * by increasing the 'cc_cwnd_inc_rate' consensus parameter.
+ *
+ * If this returns 0 due to high cwnd_inc_rate, the calling code will
+ * update every sendme ack.
+ */
+static inline uint64_t CWND_UPDATE_RATE(const congestion_control_t *cc)
+{
+  /* We add cwnd_inc_rate*sendme_inc/2 to round to nearest integer number
+   * of acks */
+  return ((cc->cwnd + cc->cwnd_inc_rate*cc->sendme_inc/2)
+           / (cc->cwnd_inc_rate*cc->sendme_inc));
+}
+
+/**
+ * Returns the amount to increment the congestion window each update,
+ * during slow start.
+ *
+ * Congestion control literature recommends either doubling the cwnd
+ * every cwnd during slow start, or some similar exponential growth
+ * (such as 50% more every cwnd, for Vegas).
+ *
+ * This is controlled by a consensus parameter 'cwnd_inc_pct_ss', which
+ * allows us to specify the percent of the current consensus window
+ * to update by.
+ */
+static inline uint64_t CWND_INC_SS(const congestion_control_t *cc)
+{
+  return (cc->cwnd_inc_pct_ss*cc->cwnd/100);
+}
+
+/**
+ * Returns the amount to increment (and for Vegas, also decrement) the
+ * congestion window by, every update period.
+ *
+ * This is controlled by the cc_cwnd_inc consensus parameter.
+ */
+#define CWND_INC(cc)           ((cc)->cwnd_inc)
+
+#endif /* !defined(CONGESTION_CONTROL_ST_H) */
diff --git a/src/core/or/congestion_control_vegas.c b/src/core/or/congestion_control_vegas.c
new file mode 100644
index 0000000000..3206821f4c
--- /dev/null
+++ b/src/core/or/congestion_control_vegas.c
@@ -0,0 +1,200 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_vegas.c
+ * \brief Code that implements the TOR_VEGAS congestion control algorithm
+ *        from Proposal #324.
+ */
+
+#define TOR_CONGESTION_CONTROL_VEGAS_PRIVATE
+
+#include "core/or/or.h"
+
+#include "core/or/crypt_path.h"
+#include "core/or/or_circuit_st.h"
+#include "core/or/sendme.h"
+#include "core/or/congestion_control_st.h"
+#include "core/or/congestion_control_common.h"
+#include "core/or/congestion_control_vegas.h"
+#include "core/or/circuitlist.h"
+#include "core/or/circuituse.h"
+#include "core/or/origin_circuit_st.h"
+#include "core/or/channel.h"
+#include "feature/nodelist/networkstatus.h"
+
+#define VEGAS_GAMMA(cc)   (6*(cc)->sendme_inc)
+#define VEGAS_ALPHA(cc)   (3*(cc)->sendme_inc)
+#define VEGAS_BETA(cc)    (6*(cc)->sendme_inc)
+
+#define VEGAS_BDP_MIX_PCT       0
+
+/**
+ * The original TCP Vegas used only a congestion window BDP estimator. We
+ * believe that the piecewise estimator is likely to perform better, but
+ * for purposes of experimentation, we might as well have a way to blend
+ * them. It also lets us set Vegas to its original estimator while other
+ * algorithms on the same network use piecewise (by setting the
+ * 'vegas_bdp_mix_pct' consensus parameter to 100, while leaving the
+ * 'cc_bdp_alg' parameter set to piecewise).
+ *
+ * Returns a percentage weighted average between the CWND estimator and
+ * the specified consensus BDP estimator.
+ */
+static inline uint64_t
+vegas_bdp_mix(const congestion_control_t *cc)
+{
+  return cc->vegas_params.bdp_mix_pct*cc->bdp[BDP_ALG_CWND_RTT]/100 +
+         (100-cc->vegas_params.bdp_mix_pct)*cc->bdp[cc->bdp_alg]/100;
+}
+
+/**
+ * Cache Vegas consensus parameters.
+ */
+void
+congestion_control_vegas_set_params(congestion_control_t *cc)
+{
+  tor_assert(cc->cc_alg == CC_ALG_VEGAS);
+
+  cc->vegas_params.gamma =
+   networkstatus_get_param(NULL, "cc_vegas_gamma",
+      VEGAS_GAMMA(cc),
+      0,
+      1000);
+
+  cc->vegas_params.alpha =
+   networkstatus_get_param(NULL, "cc_vegas_alpha",
+      VEGAS_ALPHA(cc),
+      0,
+      1000);
+
+  cc->vegas_params.beta =
+   networkstatus_get_param(NULL, "cc_vegas_beta",
+      VEGAS_BETA(cc),
+      0,
+      1000);
+
+  cc->vegas_params.bdp_mix_pct =
+   networkstatus_get_param(NULL, "cc_vegas_bdp_mix",
+      VEGAS_BDP_MIX_PCT,
+      0,
+      100);
+}
+
+/**
+ * Process a SENDME and update the congestion window according to the
+ * rules specified in TOR_VEGAS of Proposal #324.
+ *
+ * Essentially, this algorithm attempts to measure queue lengths on
+ * the circuit by subtracting the bandwidth-delay-product estimate
+ * from the current congestion window.
+ *
+ * If the congestion window is larger than the bandwidth-delay-product,
+ * then data is assumed to be queuing. We reduce the congestion window
+ * in that case.
+ *
+ * If the congestion window is smaller than the bandwidth-delay-product,
+ * then there is spare bandwidth capacity on the circuit. We increase the
+ * congestion window in that case.
+ *
+ * The congestion window is updated only once every congestion window worth of
+ * packets, even if the signal persists. It is also updated whenever the
+ * upstream orcon blocks, or unblocks. This minimizes local client queues.
+ */
+int
+congestion_control_vegas_process_sendme(congestion_control_t *cc,
+                                        const circuit_t *circ,
+                                        const crypt_path_t *layer_hint)
+{
+  uint64_t queue_use;
+
+  tor_assert(cc && cc->cc_alg == CC_ALG_VEGAS);
+  tor_assert(circ);
+
+  /* Update ack counter until next congestion signal event is allowed */
+  if (cc->next_cc_event)
+    cc->next_cc_event--;
+
+  /* Compute BDP and RTT. If we did not update, don't run the alg */
+  if (!congestion_control_update_circuit_estimates(cc, circ, layer_hint)) {
+    cc->inflight = cc->inflight - cc->sendme_inc;
+    return 0;
+  }
+
+  /* We only update anything once per window */
+  if (cc->next_cc_event == 0) {
+    /* The queue use is the amount in which our cwnd is above BDP;
+     * if it is below, then 0 queue use. */
+    if (vegas_bdp_mix(cc) > cc->cwnd)
+      queue_use = 0;
+    else
+      queue_use = cc->cwnd - vegas_bdp_mix(cc);
+
+    if (cc->in_slow_start) {
+      if (queue_use < cc->vegas_params.gamma && !cc->blocked_chan) {
+        /* Grow to BDP immediately, then exponential growth until
+         * congestion signal */
+        cc->cwnd = MAX(cc->cwnd + CWND_INC_SS(cc),
+                       vegas_bdp_mix(cc));
+      } else {
+        /* Congestion signal: Fall back to Vegas equilibrium (BDP) */
+        cc->cwnd = vegas_bdp_mix(cc);
+        cc->in_slow_start = 0;
+        log_info(LD_CIRC, "CC: TOR_VEGAS exiting slow start");
+      }
+    } else {
+      if (queue_use > cc->vegas_params.beta || cc->blocked_chan) {
+        cc->cwnd -= CWND_INC(cc);
+      } else if (queue_use < cc->vegas_params.alpha) {
+        cc->cwnd += CWND_INC(cc);
+      }
+    }
+
+    /* cwnd can never fall below 1 increment */
+    cc->cwnd = MAX(cc->cwnd, cc->cwnd_min);
+
+    /* Schedule next update */
+    cc->next_cc_event = CWND_UPDATE_RATE(cc);
+
+    if (CIRCUIT_IS_ORIGIN(circ)) {
+      log_info(LD_CIRC,
+                 "CC: TOR_VEGAS Circuit %d "
+                 "CWND: %"PRIu64", "
+                 "INFL: %"PRIu64", "
+                 "VBDP: %"PRIu64", "
+                 "QUSE: %"PRIu64", "
+                 "NCCE: %"PRIu64", "
+                 "SS: %d",
+               CONST_TO_ORIGIN_CIRCUIT(circ)->global_identifier,
+               cc->cwnd,
+               cc->inflight,
+               vegas_bdp_mix(cc),
+               queue_use,
+               cc->next_cc_event,
+               cc->in_slow_start
+               );
+    } else {
+      log_info(LD_CIRC,
+                 "CC: TOR_VEGAS Circuit %"PRIu64":%d "
+                 "CWND: %"PRIu64", "
+                 "INFL: %"PRIu64", "
+                 "VBDP: %"PRIu64", "
+                 "QUSE: %"PRIu64", "
+                 "NCCE: %"PRIu64", "
+                 "SS: %d",
+                 circ->n_chan->global_identifier, circ->n_circ_id,
+               cc->cwnd,
+               cc->inflight,
+               vegas_bdp_mix(cc),
+               queue_use,
+               cc->next_cc_event,
+               cc->in_slow_start
+               );
+    }
+  }
+
+  /* Update inflight with ack */
+  cc->inflight = cc->inflight - cc->sendme_inc;
+
+  return 0;
+}
diff --git a/src/core/or/congestion_control_vegas.h b/src/core/or/congestion_control_vegas.h
new file mode 100644
index 0000000000..111345081c
--- /dev/null
+++ b/src/core/or/congestion_control_vegas.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_vegas.h
+ * \brief Private-ish APIs for the TOR_VEGAS congestion control algorithm
+ **/
+
+#ifndef TOR_CONGESTION_CONTROL_VEGAS_H
+#define TOR_CONGESTION_CONTROL_VEGAS_H
+
+#include "core/or/crypt_path_st.h"
+#include "core/or/circuit_st.h"
+
+/* Processing SENDME cell. */
+int congestion_control_vegas_process_sendme(struct congestion_control_t *cc,
+                                            const circuit_t *circ,
+                                            const crypt_path_t *layer_hint);
+void congestion_control_vegas_set_params(struct congestion_control_t *cc);
+
+/* Private section starts. */
+#ifdef TOR_CONGESTION_CONTROL_VEGAS_PRIVATE
+
+/*
+ * Unit tests declaractions.
+ */
+#ifdef TOR_UNIT_TESTS
+
+#endif /* defined(TOR_UNIT_TESTS) */
+
+#endif /* defined(TOR_CONGESTION_CONTROL_VEGAS_PRIVATE) */
+
+#endif /* !defined(TOR_CONGESTION_CONTROL_VEGAS_H) */
diff --git a/src/core/or/congestion_control_westwood.c b/src/core/or/congestion_control_westwood.c
new file mode 100644
index 0000000000..4b24234212
--- /dev/null
+++ b/src/core/or/congestion_control_westwood.c
@@ -0,0 +1,231 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_westwood.c
+ * \brief Code that implements the TOR_WESTWOOD congestion control algorithm
+ *        from Proposal #324.
+ */
+
+#define TOR_CONGESTION_CONTROL_WESTWOOD_PRIVATE
+
+#include "core/or/or.h"
+
+#include "core/or/crypt_path.h"
+#include "core/or/or_circuit_st.h"
+#include "core/or/sendme.h"
+#include "core/or/congestion_control_st.h"
+#include "core/or/congestion_control_common.h"
+#include "core/or/congestion_control_westwood.h"
+#include "core/or/circuitlist.h"
+#include "core/or/circuituse.h"
+#include "core/or/origin_circuit_st.h"
+#include "core/or/channel.h"
+#include "feature/nodelist/networkstatus.h"
+
+#define USEC_ONE_MS (1000)
+
+#define WESTWOOD_CWND_BACKOFF_M 75
+#define WESTWOOD_RTT_BACKOFF_M 100
+#define WESTWOOD_RTT_THRESH 33
+#define WESTWOOD_MIN_BACKOFF 0
+
+/**
+ * Cache westwood consensus parameters.
+ */
+void
+congestion_control_westwood_set_params(congestion_control_t *cc)
+{
+  tor_assert(cc->cc_alg == CC_ALG_WESTWOOD);
+
+  cc->westwood_params.cwnd_backoff_m =
+   networkstatus_get_param(NULL, "cc_westwood_cwnd_m",
+      WESTWOOD_CWND_BACKOFF_M,
+      0,
+      100);
+
+  cc->westwood_params.rtt_backoff_m =
+   networkstatus_get_param(NULL, "cc_westwood_rtt_m",
+      WESTWOOD_RTT_BACKOFF_M,
+      50,
+      100);
+
+  cc->westwood_params.rtt_thresh =
+   networkstatus_get_param(NULL, "cc_westwood_rtt_thresh",
+      WESTWOOD_RTT_THRESH,
+      0,
+      100);
+
+  cc->westwood_params.min_backoff =
+   networkstatus_get_param(NULL, "cc_westwood_min_backoff",
+      WESTWOOD_MIN_BACKOFF,
+      0,
+      1);
+}
+
+/**
+ * Return the RTT threshhold that signals congestion.
+ *
+ * Computed from the threshold parameter that specifies a
+ * percent between the min and max RTT obseved so far.
+ */
+static inline uint64_t
+westwood_rtt_signal(const congestion_control_t *cc)
+{
+  return ((100 - cc->westwood_params.rtt_thresh)*cc->min_rtt_usec +
+          cc->westwood_params.rtt_thresh*(cc)->max_rtt_usec)/100;
+}
+
+/**
+ * Compute a backoff to reduce the max RTT.
+ *
+ * This may be necessary to ensure that westwood does not have
+ * a runaway condition where congestion inflates the max RTT, which
+ * inflates the congestion threshold. That cannot happen with one
+ * Westwood instance, but it may happen in aggregate. Hence, this is
+ * a safety parameter, in case we need it.
+ */
+static inline uint64_t
+westwood_rtt_max_backoff(const congestion_control_t *cc)
+{
+  return cc->min_rtt_usec +
+          (cc->westwood_params.rtt_backoff_m *
+          (cc->max_rtt_usec - cc->min_rtt_usec))/100;
+}
+
+/**
+ * Returns true if the circuit is experiencing congestion, as per
+ * TOR_WESTWOOD rules.
+ */
+static inline bool
+westwood_is_congested(const congestion_control_t *cc)
+{
+  /* If the local channel is blocked, that is always congestion */
+  if (cc->blocked_chan)
+    return true;
+
+  /* If the min RTT is within 1ms of the signal, then there is not enough
+   * range in RTTs to signify congestion. Treat that as not congested. */
+  if (westwood_rtt_signal(cc) < cc->min_rtt_usec ||
+      westwood_rtt_signal(cc) - cc->min_rtt_usec < USEC_ONE_MS)
+    return false;
+
+  /* If the EWMA-smoothed RTT exceeds the westwood RTT threshhold,
+   * then it is congestion. */
+  if (cc->ewma_rtt_usec > westwood_rtt_signal(cc))
+    return true;
+
+  return false;
+}
+
+/**
+ * Process a SENDME and update the congestion window according to the
+ * rules specified in TOR_WESTWOOD of Proposal #324.
+ *
+ * Essentially, this algorithm uses a threshhold of 'rtt_thresh', which
+ * is a midpoint between the min and max RTT. If the RTT exceeds this
+ * threshhold, then queue delay due to congestion is assumed to be present,
+ * and the algirithm reduces the congestion window. If the RTT is below the
+ * threshhold, the circuit is not congested (ie: queue delay is low), and we
+ * increase the congestion window.
+ *
+ * The congestion window is updated only once every congestion window worth of
+ * packets, even if the signal persists. It is also updated whenever the
+ * upstream orcon blocks, or unblocks. This minimizes local client queues.
+ */
+int
+congestion_control_westwood_process_sendme(congestion_control_t *cc,
+                                           const circuit_t *circ,
+                                           const crypt_path_t *layer_hint)
+{
+  tor_assert(cc && cc->cc_alg == CC_ALG_WESTWOOD);
+  tor_assert(circ);
+
+  /* Update ack counter until next congestion signal event is allowed */
+  if (cc->next_cc_event)
+    cc->next_cc_event--;
+
+  /* If we were unable to update our circuit estimates, Westwood must
+   * *not* update its cwnd, otherwise it could run to infinity, or to 0.
+   * Just update inflight from the sendme and return. */
+  if (!congestion_control_update_circuit_estimates(cc, circ, layer_hint)) {
+    cc->inflight = cc->inflight - cc->sendme_inc;
+    return 0;
+  }
+
+  /* We only update anything once per window */
+  if (cc->next_cc_event == 0) {
+    if (!westwood_is_congested(cc)) {
+      if (cc->in_slow_start) {
+        cc->cwnd = MAX(cc->cwnd + CWND_INC_SS(cc),
+                       cc->bdp[cc->bdp_alg]);
+      } else {
+        cc->cwnd = cc->cwnd + CWND_INC(cc);
+      }
+    } else {
+      if (cc->westwood_params.min_backoff)
+        cc->cwnd = MIN(cc->cwnd*cc->westwood_params.cwnd_backoff_m/100,
+                       cc->bdp[cc->bdp_alg]);
+      else
+        cc->cwnd = MAX(cc->cwnd*cc->westwood_params.cwnd_backoff_m/100,
+                       cc->bdp[cc->bdp_alg]);
+
+      cc->in_slow_start = 0;
+
+      // Because Westwood's congestion can runaway and boost max rtt,
+      // which increases its congestion signal, we backoff the max rtt
+      // too.
+      cc->max_rtt_usec = westwood_rtt_max_backoff(cc);
+
+      log_info(LD_CIRC, "CC: TOR_WESTWOOD congestion. New max RTT: %"PRIu64,
+                 cc->max_rtt_usec/1000);
+    }
+
+    /* cwnd can never fall below 1 increment */
+    cc->cwnd = MAX(cc->cwnd, cc->cwnd_min);
+
+    /* Schedule next update */
+    cc->next_cc_event = CWND_UPDATE_RATE(cc);
+
+    if (CIRCUIT_IS_ORIGIN(circ)) {
+      log_info(LD_CIRC,
+                 "CC: TOR_WESTWOOD Circuit %d "
+                 "CWND: %"PRIu64", "
+                 "INFL: %"PRIu64", "
+                 "NCCE: %"PRIu64", "
+                 "WRTT: %"PRIu64", "
+                 "WSIG: %"PRIu64", "
+                 "SS: %d",
+               CONST_TO_ORIGIN_CIRCUIT(circ)->global_identifier,
+               cc->cwnd,
+               cc->inflight,
+               cc->next_cc_event,
+               cc->ewma_rtt_usec/1000,
+               westwood_rtt_signal(cc)/1000,
+               cc->in_slow_start
+               );
+    } else {
+      log_info(LD_CIRC,
+                 "CC: TOR_WESTWOOD Circuit %"PRIu64":%d "
+                 "CWND: %"PRIu64", "
+                 "INFL: %"PRIu64", "
+                 "NCCE: %"PRIu64", "
+                 "WRTT: %"PRIu64", "
+                 "WSIG: %"PRIu64", "
+                 "SS: %d",
+               circ->n_chan->global_identifier, circ->n_circ_id,
+               cc->cwnd,
+               cc->inflight,
+               cc->next_cc_event,
+               cc->ewma_rtt_usec/1000,
+               westwood_rtt_signal(cc)/1000,
+               cc->in_slow_start
+               );
+    }
+  }
+
+  /* Update inflight with ack */
+  cc->inflight = cc->inflight - cc->sendme_inc;
+
+  return 0;
+}
diff --git a/src/core/or/congestion_control_westwood.h b/src/core/or/congestion_control_westwood.h
new file mode 100644
index 0000000000..c6fd596df4
--- /dev/null
+++ b/src/core/or/congestion_control_westwood.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2019-2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file congestion_control_westwood.h
+ * \brief Private-ish APIs for the TOR_WESTWOOD congestion control algorithm
+ **/
+
+#ifndef TOR_CONGESTION_CONTROL_WESTWOOD_H
+#define TOR_CONGESTION_CONTROL_WESTWOOD_H
+
+#include "core/or/crypt_path_st.h"
+#include "core/or/circuit_st.h"
+
+/* Processing SENDME cell. */
+int congestion_control_westwood_process_sendme(struct congestion_control_t *cc,
+                                               const circuit_t *circ,
+                                               const crypt_path_t *layer_hint);
+void congestion_control_westwood_set_params(struct congestion_control_t *cc);
+
+/* Private section starts. */
+#ifdef TOR_CONGESTION_CONTROL_WESTWOOD_PRIVATE
+
+/*
+ * Unit tests declaractions.
+ */
+#ifdef TOR_UNIT_TESTS
+
+#endif /* defined(TOR_UNIT_TESTS) */
+
+#endif /* defined(TOR_CONGESTION_CONTROL_WESTWOOD_PRIVATE) */
+
+#endif /* !defined(TOR_CONGESTION_CONTROL_WESTWOOD_H) */
diff --git a/src/core/or/connection_edge.c b/src/core/or/connection_edge.c
index d3979b3a7e..ea4bf00735 100644
--- a/src/core/or/connection_edge.c
+++ b/src/core/or/connection_edge.c
@@ -69,6 +69,8 @@
 #include "core/or/circuituse.h"
 #include "core/or/circuitpadding.h"
 #include "core/or/connection_edge.h"
+#include "core/or/congestion_control_flow.h"
+#include "core/or/circuitstats.h"
 #include "core/or/connection_or.h"
 #include "core/or/extendinfo.h"
 #include "core/or/policies.h"
@@ -614,20 +616,39 @@ connection_half_edge_add(const edge_connection_t *conn,
 
   half_conn->stream_id = conn->stream_id;
 
-  // How many sendme's should I expect?
-  half_conn->sendmes_pending =
-   (STREAMWINDOW_START-conn->package_window)/STREAMWINDOW_INCREMENT;
-
    // Is there a connected cell pending?
   half_conn->connected_pending = conn->base_.state ==
       AP_CONN_STATE_CONNECT_WAIT;
 
-  /* Data should only arrive if we're not waiting on a resolved cell.
-   * It can arrive after waiting on connected, because of optimistic
-   * data. */
-  if (conn->base_.state != AP_CONN_STATE_RESOLVE_WAIT) {
-    // How many more data cells can arrive on this id?
-    half_conn->data_pending = conn->deliver_window;
+  if (edge_uses_flow_control(conn)) {
+    /* If the edge uses the new congestion control flow control, we must use
+     * time-based limits on half-edge activity. */
+    uint64_t timeout_usec = (uint64_t)(get_circuit_build_timeout_ms()*1000);
+    half_conn->used_ccontrol = 1;
+
+    /* If this is an onion service circuit, double the CBT as an approximate
+     * value for the other half of the circuit */
+    if (conn->hs_ident) {
+      timeout_usec *= 2;
+    }
+
+    /* The stream should stop seeing any use after the larger of the circuit
+     * RTT and the overall circuit build timeout */
+    half_conn->end_ack_expected_usec = MAX(timeout_usec,
+                                           edge_get_max_rtt(conn)) +
+                                        monotime_absolute_usec();
+  } else {
+    // How many sendme's should I expect?
+    half_conn->sendmes_pending =
+     (STREAMWINDOW_START-conn->package_window)/STREAMWINDOW_INCREMENT;
+
+    /* Data should only arrive if we're not waiting on a resolved cell.
+     * It can arrive after waiting on connected, because of optimistic
+     * data. */
+    if (conn->base_.state != AP_CONN_STATE_RESOLVE_WAIT) {
+      // How many more data cells can arrive on this id?
+      half_conn->data_pending = conn->deliver_window;
+    }
   }
 
   insert_at = smartlist_bsearch_idx(circ->half_streams, &half_conn->stream_id,
@@ -688,6 +709,12 @@ connection_half_edge_is_valid_data(const smartlist_t *half_conns,
   if (!half)
     return 0;
 
+  if (half->used_ccontrol) {
+    if (monotime_absolute_usec() > half->end_ack_expected_usec)
+      return 0;
+    return 1;
+  }
+
   if (half->data_pending > 0) {
     half->data_pending--;
     return 1;
@@ -740,6 +767,10 @@ connection_half_edge_is_valid_sendme(const smartlist_t *half_conns,
   if (!half)
     return 0;
 
+  /* congestion control edges don't use sendmes */
+  if (half->used_ccontrol)
+    return 0;
+
   if (half->sendmes_pending > 0) {
     half->sendmes_pending--;
     return 1;
@@ -1269,15 +1300,6 @@ connection_ap_rescan_and_attach_pending(void)
   connection_ap_attach_pending(1);
 }
 
-#ifdef DEBUGGING_17659
-#define UNMARK() do {                           \
-    entry_conn->marked_pending_circ_line = 0;   \
-    entry_conn->marked_pending_circ_file = 0;   \
-  } while (0)
-#else /* !defined(DEBUGGING_17659) */
-#define UNMARK() do { } while (0)
-#endif /* defined(DEBUGGING_17659) */
-
 /** Tell any AP streams that are listed as waiting for a new circuit to try
  * again.  If there is an available circuit for a stream, attach it. Otherwise,
  * launch a new circuit.
@@ -1306,21 +1328,18 @@ connection_ap_attach_pending(int retry)
     connection_t *conn = ENTRY_TO_CONN(entry_conn);
     tor_assert(conn && entry_conn);
     if (conn->marked_for_close) {
-      UNMARK();
       continue;
     }
     if (conn->magic != ENTRY_CONNECTION_MAGIC) {
       log_warn(LD_BUG, "%p has impossible magic value %u.",
                entry_conn, (unsigned)conn->magic);
-      UNMARK();
       continue;
     }
     if (conn->state != AP_CONN_STATE_CIRCUIT_WAIT) {
-      log_warn(LD_BUG, "%p is no longer in circuit_wait. Its current state "
-               "is %s. Why is it on pending_entry_connections?",
-               entry_conn,
-               conn_state_to_string(conn->type, conn->state));
-      UNMARK();
+      /* The connection_ap_handshake_attach_circuit() call, for onion service,
+       * can lead to more than one connections in the "pending" list to change
+       * state and so it is OK to get here. Ignore it because this connection
+       * won't be in pending_entry_connections list after this point. */
       continue;
     }
 
@@ -1345,7 +1364,6 @@ connection_ap_attach_pending(int retry)
 
     /* If we got here, then we either closed the connection, or
      * we attached it. */
-    UNMARK();
   } SMARTLIST_FOREACH_END(entry_conn);
 
   smartlist_free(pending);
@@ -1416,7 +1434,6 @@ connection_ap_mark_as_non_pending_circuit(entry_connection_t *entry_conn)
 {
   if (PREDICT_UNLIKELY(NULL == pending_entry_connections))
     return;
-  UNMARK();
   smartlist_remove(pending_entry_connections, entry_conn);
 }
 
@@ -1612,23 +1629,6 @@ consider_plaintext_ports(entry_connection_t *conn, uint16_t port)
   return 0;
 }
 
-/** Return true iff <b>query</b> is a syntactically valid service ID (as
- * generated by rend_get_service_id).  */
-static int
-rend_valid_v2_service_id(const char *query)
-{
-  /** Length of 'y' portion of 'y.onion' URL. */
-#define REND_SERVICE_ID_LEN_BASE32 16
-
-  if (strlen(query) != REND_SERVICE_ID_LEN_BASE32)
-    return 0;
-
-  if (strspn(query, BASE32_CHARS) != REND_SERVICE_ID_LEN_BASE32)
-    return 0;
-
-  return 1;
-}
-
 /** Parse the given hostname in address. Returns true if the parsing was
  * successful and type_out contains the type of the hostname. Else, false is
  * returned which means it was not recognized and type_out is set to
@@ -1692,14 +1692,6 @@ parse_extended_hostname(char *address, hostname_type_t *type_out)
   if (q != address) {
     memmove(address, q, strlen(q) + 1 /* also get \0 */);
   }
-  /* v2 onion address check. */
-  if (strlen(query) == REND_SERVICE_ID_LEN_BASE32) {
-    *type_out = ONION_V2_HOSTNAME;
-    if (rend_valid_v2_service_id(query)) {
-      goto success;
-    }
-    goto failed;
-  }
 
   /* v3 onion address check. */
   if (strlen(query) == HS_SERVICE_ADDR_LEN_BASE32) {
@@ -1719,8 +1711,7 @@ parse_extended_hostname(char *address, hostname_type_t *type_out)
  failed:
   /* otherwise, return to previous state and return 0 */
   *s = '.';
-  const bool is_onion = (*type_out == ONION_V2_HOSTNAME) ||
-    (*type_out == ONION_V3_HOSTNAME);
+  const bool is_onion = (*type_out == ONION_V3_HOSTNAME);
   log_warn(LD_APP, "Invalid %shostname %s; rejecting",
            is_onion ? "onion " : "",
            safe_str_client(address));
@@ -2242,7 +2233,7 @@ connection_ap_handshake_rewrite_and_attach(entry_connection_t *conn,
   }
 
   /* Now, we handle everything that isn't a .onion address. */
-  if (addresstype != ONION_V3_HOSTNAME && addresstype != ONION_V2_HOSTNAME) {
+  if (addresstype != ONION_V3_HOSTNAME) {
     /* Not a hidden-service request.  It's either a hostname or an IP,
      * possibly with a .exit that we stripped off.  We're going to check
      * if we're allowed to connect/resolve there, and then launch the
@@ -2527,28 +2518,6 @@ connection_ap_handshake_rewrite_and_attach(entry_connection_t *conn,
     return 0;
   } else {
     /* If we get here, it's a request for a .onion address! */
-
-    /* We don't support v2 onions anymore. Log a warning and bail. */
-    if (addresstype == ONION_V2_HOSTNAME) {
-      static bool log_once = false;
-      if (!log_once) {
-        log_warn(LD_PROTOCOL, "Tried to connect to a v2 onion address, but "
-                 "this version of Tor no longer supports them. Please "
-                 "encourage the site operator to upgrade. For more "
-                 "information see "
-                 "https://blog.torproject.org/v2-deprecation-timeline.");
-        log_once = true;
-      }
-      control_event_client_status(LOG_WARN, "SOCKS_BAD_HOSTNAME HOSTNAME=%s",
-                                  escaped(socks->address));
-      /* Send back the 0xF6 extended code indicating a bad hostname. This is
-       * mostly so Tor Browser can make a proper UX with regards to v2
-       * addresses. */
-      conn->socks_request->socks_extended_error_code = SOCKS5_HS_BAD_ADDRESS;
-      connection_mark_unattached_ap(conn, END_STREAM_REASON_TORPROTOCOL);
-      return -1;
-    }
-
     tor_assert(addresstype == ONION_V3_HOSTNAME);
     tor_assert(!automap);
     return connection_ap_handle_onion(conn, socks, circ);
diff --git a/src/core/or/connection_edge.h b/src/core/or/connection_edge.h
index 72869f348b..966a9391d8 100644
--- a/src/core/or/connection_edge.h
+++ b/src/core/or/connection_edge.h
@@ -80,7 +80,6 @@ typedef enum hostname_type_t {
   BAD_HOSTNAME,
   EXIT_HOSTNAME,
   NORMAL_HOSTNAME,
-  ONION_V2_HOSTNAME,
   ONION_V3_HOSTNAME,
 } hostname_type_t;
 
diff --git a/src/core/or/connection_or.c b/src/core/or/connection_or.c
index dd31638eb3..db9f93e6f6 100644
--- a/src/core/or/connection_or.c
+++ b/src/core/or/connection_or.c
@@ -65,6 +65,7 @@
 #include "core/or/scheduler.h"
 #include "feature/nodelist/torcert.h"
 #include "core/or/channelpadding.h"
+#include "core/or/congestion_control_common.h"
 #include "feature/dirauth/authmode.h"
 #include "feature/hs/hs_service.h"
 
@@ -636,7 +637,7 @@ connection_or_flushed_some(or_connection_t *conn)
   /* If we're under the low water mark, add cells until we're just over the
    * high water mark. */
   datalen = connection_get_outbuf_len(TO_CONN(conn));
-  if (datalen < OR_CONN_LOWWATER) {
+  if (datalen < or_conn_lowwatermark()) {
     /* Let the scheduler know */
     scheduler_channel_wants_writes(TLS_CHAN_TO_BASE(conn->chan));
   }
@@ -660,9 +661,9 @@ connection_or_num_cells_writeable(or_connection_t *conn)
    * used to trigger when to start writing after we've stopped.
    */
   datalen = connection_get_outbuf_len(TO_CONN(conn));
-  if (datalen < OR_CONN_HIGHWATER) {
+  if (datalen < or_conn_highwatermark()) {
     cell_network_size = get_cell_network_size(conn->wide_circ_ids);
-    n = CEIL_DIV(OR_CONN_HIGHWATER - datalen, cell_network_size);
+    n = CEIL_DIV(or_conn_highwatermark() - datalen, cell_network_size);
   }
 
   return n;
diff --git a/src/core/or/crypt_path.c b/src/core/or/crypt_path.c
index 29356d7c2a..7673bc306f 100644
--- a/src/core/or/crypt_path.c
+++ b/src/core/or/crypt_path.c
@@ -27,6 +27,7 @@
 #include "core/or/circuitbuild.h"
 #include "core/or/circuitlist.h"
 #include "core/or/extendinfo.h"
+#include "core/or/congestion_control_common.h"
 
 #include "lib/crypt_ops/crypto_dh.h"
 #include "lib/crypt_ops/crypto_util.h"
@@ -165,6 +166,7 @@ cpath_free(crypt_path_t *victim)
   onion_handshake_state_release(&victim->handshake_state);
   crypto_dh_free(victim->rend_dh_handshake_state);
   extend_info_free(victim->extend_info);
+  congestion_control_free(victim->ccontrol);
 
   memwipe(victim, 0xBB, sizeof(crypt_path_t)); /* poison memory */
   tor_free(victim);
diff --git a/src/core/or/crypt_path_st.h b/src/core/or/crypt_path_st.h
index 2529b6ee41..ddc85eec14 100644
--- a/src/core/or/crypt_path_st.h
+++ b/src/core/or/crypt_path_st.h
@@ -29,6 +29,8 @@ struct onion_handshake_state_t {
   } u;
 };
 
+struct congestion_control_t;
+
 /** Macro to encapsulate private members of a struct.
  *
  *  Renames 'x' to 'x_crypt_path_private_field'.
@@ -80,6 +82,9 @@ struct crypt_path_t {
   int deliver_window; /**< How many cells are we willing to deliver originating
                        * at this step? */
 
+  /** Congestion control info */
+  struct congestion_control_t *ccontrol;
+
   /*********************** Private members ****************************/
 
   /** Private member: Cryptographic state used for encrypting and
diff --git a/src/core/or/edge_connection_st.h b/src/core/or/edge_connection_st.h
index 0120c3df25..dab32fc8d0 100644
--- a/src/core/or/edge_connection_st.h
+++ b/src/core/or/edge_connection_st.h
@@ -15,6 +15,7 @@
 #include "core/or/or.h"
 
 #include "core/or/connection_st.h"
+#include "lib/evloop/token_bucket.h"
 
 /** Subtype of connection_t for an "edge connection" -- that is, an entry (ap)
  * connection, or an exit. */
@@ -73,6 +74,60 @@ struct edge_connection_t {
    * that's going away and being used on channels instead.  We still tag
    * edge connections with dirreq_id from circuits, so it's copied here. */
   uint64_t dirreq_id;
+
+  /* The following are flow control fields */
+
+  /** Used for rate limiting the read side of this edge connection when
+   * congestion control is enabled on its circuit. The XON cell ewma_drain_rate
+   * parameter is used to set the bucket limits. */
+  token_bucket_rw_t bucket;
+
+  /**
+   * Monotime timestamp of the last time we sent a flow control message
+   * for this edge, used to compute advisory rates */
+  uint64_t drain_start_usec;
+
+  /**
+   * Number of bytes written since we either emptied our buffers,
+   * or sent an advisory drate rate. Can wrap, buf if so,
+   * we must reset the usec timestamp above. (Or make this u64, idk).
+   */
+  uint32_t drained_bytes;
+  uint32_t prev_drained_bytes;
+
+  /**
+   * N_EWMA of the drain rate of writes on this edge conn
+   * while buffers were present.
+   */
+  uint32_t ewma_drain_rate;
+
+  /**
+   * The ewma drain rate the last time we sent an xon.
+   */
+  uint32_t ewma_rate_last_sent;
+
+  /**
+   * The following fields are used to count the total bytes sent on this
+   * stream, and compare them to the number of XON and XOFFs recieved, so
+   * that clients can check rate limits of XOFF/XON to prevent dropmark
+   * attacks. */
+  uint32_t total_bytes_xmit;
+
+  /** Number of XOFFs received */
+  uint8_t num_xoff_recv;
+
+  /** Number of XONs received */
+  uint8_t num_xon_recv;
+
+  /**
+   * Flag that tells us if an XOFF has been sent; cleared when we send an XON.
+   * Used to avoid sending multiple */
+  uint8_t xoff_sent : 1;
+
+  /** Flag that tells us if an XOFF has been received; cleared when we get
+   * an XON. Used to ensure that this edge keeps reads on its edge socket
+   * disabled. */
+  uint8_t xoff_received : 1;
 };
 
 #endif /* !defined(EDGE_CONNECTION_ST_H) */
diff --git a/src/core/or/half_edge_st.h b/src/core/or/half_edge_st.h
index c956c7434a..ac97eb19f1 100644
--- a/src/core/or/half_edge_st.h
+++ b/src/core/or/half_edge_st.h
@@ -31,6 +31,18 @@ typedef struct half_edge_t {
    * our deliver window */
   int data_pending;
 
+  /**
+   * Monotime timestamp of when the other end should have successfuly
+   * shut down the stream and stop sending data, based on the larger
+   * of circuit RTT and CBT. Used if 'used_ccontrol' is true, to expire
+   * the half_edge at this monotime timestamp. */
+  uint64_t end_ack_expected_usec;
+
+  /**
+   * Did this edge use congestion control? If so, use
+   * timer instead of pending data approach */
+  int used_ccontrol : 1;
+
   /** Is there a connected cell pending? */
   int connected_pending : 1;
 } half_edge_t;
diff --git a/src/core/or/include.am b/src/core/or/include.am
index b578b75673..b08f8509cc 100644
--- a/src/core/or/include.am
+++ b/src/core/or/include.am
@@ -28,13 +28,17 @@ LIBTOR_APP_A_SOURCES += 				\
 	src/core/or/orconn_event.c		\
 	src/core/or/policies.c			\
 	src/core/or/protover.c			\
-	src/core/or/protover_rust.c		\
 	src/core/or/reasons.c			\
 	src/core/or/relay.c			\
 	src/core/or/scheduler.c			\
 	src/core/or/scheduler_kist.c		\
 	src/core/or/scheduler_vanilla.c		\
 	src/core/or/sendme.c			\
+	src/core/or/congestion_control_common.c			\
+	src/core/or/congestion_control_vegas.c			\
+	src/core/or/congestion_control_nola.c			\
+	src/core/or/congestion_control_westwood.c			\
+	src/core/or/congestion_control_flow.c			\
 	src/core/or/status.c			\
 	src/core/or/versions.c
 
@@ -57,6 +61,7 @@ noinst_HEADERS +=					\
 	src/core/or/circuitpadding_machines.h		\
 	src/core/or/circuituse.h			\
 	src/core/or/command.h				\
+	src/core/or/congestion_control_st.h				\
 	src/core/or/connection_edge.h			\
 	src/core/or/connection_or.h			\
 	src/core/or/connection_st.h			\
@@ -77,6 +82,7 @@ noinst_HEADERS +=					\
 	src/core/or/entry_port_cfg_st.h			\
 	src/core/or/extend_info_st.h			\
 	src/core/or/listener_connection_st.h		\
+	src/core/or/lttng_cc.inc			\
 	src/core/or/lttng_circuit.inc			\
 	src/core/or/onion.h				\
 	src/core/or/or.h				\
@@ -97,6 +103,11 @@ noinst_HEADERS +=					\
 	src/core/or/relay_crypto_st.h			\
 	src/core/or/scheduler.h				\
 	src/core/or/sendme.h				\
+	src/core/or/congestion_control_flow.h				\
+	src/core/or/congestion_control_common.h				\
+	src/core/or/congestion_control_vegas.h				\
+	src/core/or/congestion_control_nola.h				\
+	src/core/or/congestion_control_westwood.h				\
 	src/core/or/server_port_cfg_st.h		\
 	src/core/or/socks_request_st.h			\
 	src/core/or/status.h				\
@@ -106,7 +117,9 @@ noinst_HEADERS +=					\
 
 if USE_TRACING_INSTRUMENTATION_LTTNG
 LIBTOR_APP_A_SOURCES += \
+	src/core/or/trace_probes_cc.c				\
 	src/core/or/trace_probes_circuit.c
 noinst_HEADERS += \
+	src/core/or/trace_probes_cc.h				\
 	src/core/or/trace_probes_circuit.h
 endif
diff --git a/src/core/or/lttng_cc.inc b/src/core/or/lttng_cc.inc
new file mode 100644
index 0000000000..b7bf58e196
--- /dev/null
+++ b/src/core/or/lttng_cc.inc
@@ -0,0 +1,166 @@
+/* Copyright (c) 2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file lttng_cc.inc
+ * \brief LTTng tracing probe declaration for the congestion control subsystem.
+ *        It is in this .inc file due to the non C standard syntax and the way
+ *        we guard the header with the LTTng specific
+ *        TRACEPOINT_HEADER_MULTI_READ.
+ **/
+
+#include "orconfig.h"
+
+/* We only build the following if LTTng instrumentation has been enabled. */
+#ifdef USE_TRACING_INSTRUMENTATION_LTTNG
+
+/* The following defines are LTTng-UST specific. */
+#undef TRACEPOINT_PROVIDER
+#define TRACEPOINT_PROVIDER tor_cc
+
+#undef TRACEPOINT_INCLUDE
+#define TRACEPOINT_INCLUDE "./src/core/or/lttng_cc.inc"
+
+#if !defined(LTTNG_CC_INC) || defined(TRACEPOINT_HEADER_MULTI_READ)
+#define LTTNG_CC_INC
+
+#include <lttng/tracepoint.h>
+
+/*
+ * Flow Control
+ */
+
+/* Emitted everytime the flow_control_decide_xon() function is called. */
+TRACEPOINT_EVENT(tor_cc, flow_decide_xon,
+  TP_ARGS(const edge_connection_t *, stream, size_t, n_written),
+  TP_FIELDS(
+    ctf_integer(uint64_t, stream_id, TO_CONN(stream)->global_identifier)
+    ctf_integer(size_t, written_bytes, n_written)
+    ctf_integer(uint32_t, drained_bytes_current, stream->drained_bytes)
+    ctf_integer(uint32_t, drained_bytes_previous, stream->prev_drained_bytes)
+    ctf_integer(uint32_t, ewma_drain_rate_last, stream->ewma_rate_last_sent)
+    ctf_integer(uint32_t, ewma_drain_rate_current, stream->ewma_drain_rate)
+    ctf_integer(size_t, outbuf_len,
+                connection_get_outbuf_len(TO_CONN(stream)))
+  )
+)
+
+/* Emitted when flow control starts measuring the drain rate. */
+TRACEPOINT_EVENT(tor_cc, flow_decide_xon_drain_start,
+  TP_ARGS(const edge_connection_t *, stream),
+  TP_FIELDS(
+    ctf_integer(uint64_t, stream_id, TO_CONN(stream)->global_identifier)
+    ctf_integer(uint32_t, drained_bytes_current, stream->drained_bytes)
+    ctf_integer(uint32_t, drained_bytes_previous, stream->prev_drained_bytes)
+    ctf_integer(uint32_t, ewma_drain_rate_last, stream->ewma_rate_last_sent)
+    ctf_integer(uint32_t, ewma_drain_rate_current, stream->ewma_drain_rate)
+    ctf_integer(size_t, outbuf_len,
+                connection_get_outbuf_len(TO_CONN(stream)))
+  )
+)
+
+/* Emitted when the drain rate is updated. The new_drain_rate value is what was
+ * just computed. */
+TRACEPOINT_EVENT(tor_cc, flow_decide_xon_drain_update,
+  TP_ARGS(const edge_connection_t *, stream, uint32_t, drain_rate),
+  TP_FIELDS(
+    ctf_integer(uint64_t, stream_id, TO_CONN(stream)->global_identifier)
+    ctf_integer(uint32_t, drained_bytes_current, stream->drained_bytes)
+    ctf_integer(uint32_t, drained_bytes_previous, stream->prev_drained_bytes)
+    ctf_integer(uint32_t, new_drain_rate, drain_rate)
+    ctf_integer(uint32_t, ewma_drain_rate_last, stream->ewma_rate_last_sent)
+    ctf_integer(uint32_t, ewma_drain_rate_current, stream->ewma_drain_rate)
+    ctf_integer(size_t, outbuf_len,
+                connection_get_outbuf_len(TO_CONN(stream)))
+  )
+)
+
+/* Emitted when an XON cell is sent due to a notice in a drain rate change. */
+TRACEPOINT_EVENT(tor_cc, flow_decide_xon_rate_change,
+  TP_ARGS(const edge_connection_t *, stream),
+  TP_FIELDS(
+    ctf_integer(uint64_t, stream_id, TO_CONN(stream)->global_identifier)
+    ctf_integer(uint32_t, drained_bytes_current, stream->drained_bytes)
+    ctf_integer(uint32_t, drained_bytes_previous, stream->prev_drained_bytes)
+    ctf_integer(uint32_t, ewma_drain_rate_last, stream->ewma_rate_last_sent)
+    ctf_integer(uint32_t, ewma_drain_rate_current, stream->ewma_drain_rate)
+    ctf_integer(size_t, outbuf_len,
+                connection_get_outbuf_len(TO_CONN(stream)))
+  )
+)
+
+/* Emitted when an XON cell is sent because we partially or fully drained the
+ * edge connection buffer. */
+TRACEPOINT_EVENT(tor_cc, flow_decide_xon_partial_drain,
+  TP_ARGS(const edge_connection_t *, stream),
+  TP_FIELDS(
+    ctf_integer(uint64_t, stream_id, TO_CONN(stream)->global_identifier)
+    ctf_integer(uint32_t, drained_bytes_current, stream->drained_bytes)
+    ctf_integer(uint32_t, drained_bytes_previous, stream->prev_drained_bytes)
+    ctf_integer(uint32_t, ewma_drain_rate_last, stream->ewma_rate_last_sent)
+    ctf_integer(uint32_t, ewma_drain_rate_current, stream->ewma_drain_rate)
+    ctf_integer(size_t, outbuf_len,
+                connection_get_outbuf_len(TO_CONN(stream)))
+  )
+)
+
+/* Emitted when we double the drain rate which is an attempt to see if we can
+ * speed things up. */
+TRACEPOINT_EVENT(tor_cc, flow_decide_xon_drain_doubled,
+  TP_ARGS(const edge_connection_t *, stream),
+  TP_FIELDS(
+    ctf_integer(uint64_t, stream_id, TO_CONN(stream)->global_identifier)
+    ctf_integer(uint32_t, drained_bytes_current, stream->drained_bytes)
+    ctf_integer(uint32_t, drained_bytes_previous, stream->prev_drained_bytes)
+    ctf_integer(uint32_t, ewma_drain_rate_last, stream->ewma_rate_last_sent)
+    ctf_integer(uint32_t, ewma_drain_rate_current, stream->ewma_drain_rate)
+    ctf_integer(size_t, outbuf_len,
+                connection_get_outbuf_len(TO_CONN(stream)))
+  )
+)
+
+/* XOFF */
+
+/* Emitted when we send an XOFF cell. */
+TRACEPOINT_EVENT(tor_cc, flow_decide_xoff_sending,
+  TP_ARGS(const edge_connection_t *, stream),
+  TP_FIELDS(
+    ctf_integer(uint64_t, stream_id, TO_CONN(stream)->global_identifier)
+    ctf_integer(uint32_t, drained_bytes_current, stream->drained_bytes)
+    ctf_integer(uint32_t, drained_bytes_previous, stream->prev_drained_bytes)
+    ctf_integer(uint32_t, ewma_drain_rate_last, stream->ewma_rate_last_sent)
+    ctf_integer(uint32_t, ewma_drain_rate_current, stream->ewma_drain_rate)
+    ctf_integer(size_t, outbuf_len,
+                connection_get_outbuf_len(TO_CONN(stream)))
+  )
+)
+
+/*
+ * Congestion Control
+ */
+
+/* Emitted when the BDP value has been updated. */
+TRACEPOINT_EVENT(tor_cc, bdp_update,
+  TP_ARGS(const circuit_t *, circ, const congestion_control_t *, cc,
+          uint64_t, curr_rtt_usec, uint64_t, sendme_rate_bdp),
+  TP_FIELDS(
+    ctf_integer(uint64_t, circuit_ptr, circ)
+    ctf_integer(uint32_t, n_circ_id, circ->n_circ_id)
+    ctf_integer(uint64_t, min_rtt_usec, cc->min_rtt_usec)
+    ctf_integer(uint64_t, curr_rtt_usec, curr_rtt_usec)
+    ctf_integer(uint64_t, ewma_rtt_usec, cc->ewma_rtt_usec)
+    ctf_integer(uint64_t, max_rtt_usec, cc->max_rtt_usec)
+    ctf_integer(uint64_t, bdp_inflight_rtt, cc->bdp[BDP_ALG_INFLIGHT_RTT])
+    ctf_integer(uint64_t, bdp_cwnd_rtt, cc->bdp[BDP_ALG_CWND_RTT])
+    ctf_integer(uint64_t, bdp_sendme_rate, cc->bdp[BDP_ALG_SENDME_RATE])
+    ctf_integer(uint64_t, bdp_piecewise, cc->bdp[BDP_ALG_PIECEWISE])
+    ctf_integer(uint64_t, sendme_rate_bdp, sendme_rate_bdp)
+  )
+)
+
+#endif /* LTTNG_CC_INC || TRACEPOINT_HEADER_MULTI_READ */
+
+/* Must be included after the probes declaration. */
+#include <lttng/tracepoint-event.h>
+
+#endif /* USE_TRACING_INSTRUMENTATION_LTTNG */
diff --git a/src/core/or/or.h b/src/core/or/or.h
index 99948f26e2..392a848ee7 100644
--- a/src/core/or/or.h
+++ b/src/core/or/or.h
@@ -210,6 +210,9 @@ struct curve25519_public_key_t;
 #define RELAY_COMMAND_PADDING_NEGOTIATE 41
 #define RELAY_COMMAND_PADDING_NEGOTIATED 42
 
+#define RELAY_COMMAND_XOFF 43
+#define RELAY_COMMAND_XON 44
+
 /* Reasons why an OR connection is closed. */
 #define END_OR_CONN_REASON_DONE           1
 #define END_OR_CONN_REASON_REFUSED        2 /* connection refused */
@@ -591,18 +594,6 @@ typedef struct or_handshake_state_t or_handshake_state_t;
 
 /** Length of Extended ORPort connection identifier. */
 #define EXT_OR_CONN_ID_LEN DIGEST_LEN /* 20 */
-/*
- * OR_CONN_HIGHWATER and OR_CONN_LOWWATER moved from connection_or.c so
- * channeltls.c can see them too.
- */
-
-/** When adding cells to an OR connection's outbuf, keep adding until the
- * outbuf is at least this long, or we run out of cells. */
-#define OR_CONN_HIGHWATER (32*1024)
-
-/** Add cells to an OR connection's outbuf whenever the outbuf's data length
- * drops below this size. */
-#define OR_CONN_LOWWATER (16*1024)
 
 typedef struct connection_t connection_t;
 typedef struct control_connection_t control_connection_t;
diff --git a/src/core/or/or_circuit_st.h b/src/core/or/or_circuit_st.h
index b8fbf9658e..11695ec301 100644
--- a/src/core/or/or_circuit_st.h
+++ b/src/core/or/or_circuit_st.h
@@ -52,6 +52,10 @@ struct or_circuit_t {
   /** Stores KH for the handshake. */
   char rend_circ_nonce[DIGEST_LEN];/* KH in tor-spec.txt */
 
+  /** Number of cells which we have discarded because of having no next hop,
+   * despite not recognizing the cell. */
+  uint32_t n_cells_discarded_at_end;
+
   /** How many more relay_early cells can we send on this circuit, according
    * to the specification? */
   unsigned int remaining_relay_early_cells : 4;
@@ -93,4 +97,3 @@ struct or_circuit_t {
 };
 
 #endif /* !defined(OR_CIRCUIT_ST_H) */
-
diff --git a/src/core/or/origin_circuit_st.h b/src/core/or/origin_circuit_st.h
index 9264077c50..6c86a56000 100644
--- a/src/core/or/origin_circuit_st.h
+++ b/src/core/or/origin_circuit_st.h
@@ -180,6 +180,12 @@ struct origin_circuit_t {
   unsigned first_hop_from_controller : 1;
 
   /**
+   * If true, this circuit's path has been chosen, in full or in part,
+   * by the controller API, and it's okay to ignore checks that we'd
+   * usually do on the path as whole. */
+  unsigned int any_hop_from_controller : 1;
+
+  /**
    * Tristate variable to guard against pathbias miscounting
    * due to circuit purpose transitions changing the decision
    * of pathbias_should_count(). This variable is informational
diff --git a/src/core/or/policies.c b/src/core/or/policies.c
index f91c23ad31..a53849b4d0 100644
--- a/src/core/or/policies.c
+++ b/src/core/or/policies.c
@@ -59,6 +59,9 @@ static smartlist_t *authdir_invalid_policy = NULL;
 /** Policy that addresses for incoming router descriptors must <b>not</b>
  * match in order to not be marked as BadExit. */
 static smartlist_t *authdir_badexit_policy = NULL;
+/** Policy that addresses for incoming router descriptors must <b>not</b>
+ * match in order to not be marked as MiddleOnly. */
+static smartlist_t *authdir_middleonly_policy = NULL;
 
 /** Parsed addr_policy_t describing which addresses we believe we can start
  * circuits at. */
@@ -1119,6 +1122,17 @@ authdir_policy_badexit_address(const tor_addr_t *addr, uint16_t port)
   return addr_is_in_cc_list(addr, get_options()->AuthDirBadExitCCs);
 }
 
+/** Return 1 if <b>addr</b>:<b>port</b> should be marked as MiddleOnly,
+ * based on <b>authdir_middleonly_policy</b>. Else return 0.
+ */
+int
+authdir_policy_middleonly_address(const tor_addr_t *addr, uint16_t port)
+{
+  if (!addr_policy_permits_tor_addr(addr, port, authdir_middleonly_policy))
+    return 1;
+  return addr_is_in_cc_list(addr, get_options()->AuthDirMiddleOnlyCCs);
+}
+
 #define REJECT(arg) \
   STMT_BEGIN *msg = tor_strdup(arg); goto err; STMT_END
 
@@ -1173,6 +1187,9 @@ validate_addr_policies(const or_options_t *options, char **msg)
   if (parse_addr_policy(options->AuthDirBadExit, &addr_policy,
                         ADDR_POLICY_REJECT))
     REJECT("Error in AuthDirBadExit entry.");
+  if (parse_addr_policy(options->AuthDirMiddleOnly, &addr_policy,
+                        ADDR_POLICY_REJECT))
+    REJECT("Error in AuthDirMiddleOnly entry.");
 
   if (parse_addr_policy(options->ReachableAddresses, &addr_policy,
                         ADDR_POLICY_ACCEPT))
@@ -1266,6 +1283,9 @@ policies_parse_from_options(const or_options_t *options)
   if (load_policy_from_option(options->AuthDirBadExit, "AuthDirBadExit",
                               &authdir_badexit_policy, ADDR_POLICY_REJECT) < 0)
     ret = -1;
+  if (load_policy_from_option(options->AuthDirMiddleOnly, "AuthDirMiddleOnly",
+                         &authdir_middleonly_policy, ADDR_POLICY_REJECT) < 0)
+    ret = -1;
   if (parse_metrics_port_policy(options) < 0) {
     ret = -1;
   }
@@ -3112,6 +3132,8 @@ policies_free_all(void)
   authdir_invalid_policy = NULL;
   addr_policy_list_free(authdir_badexit_policy);
   authdir_badexit_policy = NULL;
+  addr_policy_list_free(authdir_middleonly_policy);
+  authdir_middleonly_policy = NULL;
 
   if (!HT_EMPTY(&policy_root)) {
     policy_map_ent_t **ent;
diff --git a/src/core/or/policies.h b/src/core/or/policies.h
index a32f50ab1d..e11e1d0ff5 100644
--- a/src/core/or/policies.h
+++ b/src/core/or/policies.h
@@ -106,6 +106,7 @@ int metrics_policy_permits_address(const tor_addr_t *addr);
 int authdir_policy_permits_address(const tor_addr_t *addr, uint16_t port);
 int authdir_policy_valid_address(const tor_addr_t *addr, uint16_t port);
 int authdir_policy_badexit_address(const tor_addr_t *addr, uint16_t port);
+int authdir_policy_middleonly_address(const tor_addr_t *addr, uint16_t port);
 
 int validate_addr_policies(const or_options_t *options, char **msg);
 void policy_expand_private(smartlist_t **policy);
diff --git a/src/core/or/protover.c b/src/core/or/protover.c
index 04df8aeeb8..199fc830a0 100644
--- a/src/core/or/protover.c
+++ b/src/core/or/protover.c
@@ -28,8 +28,6 @@
 #include "core/or/versions.h"
 #include "lib/tls/tortls.h"
 
-#ifndef HAVE_RUST
-
 static const smartlist_t *get_supported_protocol_list(void);
 static int protocol_list_contains(const smartlist_t *protos,
                                   protocol_type_t pr, uint32_t ver);
@@ -855,5 +853,3 @@ protover_free_all(void)
     supported_protocol_list = NULL;
   }
 }
-
-#endif /* !defined(HAVE_RUST) */
diff --git a/src/core/or/protover.h b/src/core/or/protover.h
index c0739a092e..ae258d74a5 100644
--- a/src/core/or/protover.h
+++ b/src/core/or/protover.h
@@ -103,13 +103,13 @@ typedef struct proto_entry_t {
   uint64_t bitmask;
 } proto_entry_t;
 
-#if !defined(HAVE_RUST) && defined(TOR_UNIT_TESTS)
+#if defined(TOR_UNIT_TESTS)
 STATIC struct smartlist_t *parse_protocol_list(const char *s);
 STATIC char *encode_protocol_list(const struct smartlist_t *sl);
 STATIC const char *protocol_type_to_str(protocol_type_t pr);
 STATIC int str_to_protocol_type(const char *s, protocol_type_t *pr_out);
 STATIC void proto_entry_free_(proto_entry_t *entry);
-#endif /* !defined(HAVE_RUST) && defined(TOR_UNIT_TESTS) */
+#endif /* defined(TOR_UNIT_TESTS) */
 
 #define proto_entry_free(entry) \
   FREE_AND_NULL(proto_entry_t, proto_entry_free_, (entry))
diff --git a/src/core/or/protover_rust.c b/src/core/or/protover_rust.c
deleted file mode 100644
index 31ddfa1bdf..0000000000
--- a/src/core/or/protover_rust.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Copyright (c) 2016-2021, The Tor Project, Inc. */
-/* See LICENSE for licensing information */
-
-/*
- * \file protover_rust.c
- * \brief Provide a C wrapper for functions exposed in /src/rust/protover,
- * and safe translation/handling between the Rust/C boundary.
- */
-
-#include "core/or/or.h"
-#include "core/or/protover.h"
-
-#ifdef HAVE_RUST
-
-/* Define for compatibility, used in main.c */
-void
-protover_free_all(void)
-{
-}
-
-int protover_contains_long_protocol_names_(const char *s);
-
-/**
- * Return true if the unparsed protover in <b>s</b> would contain a protocol
- * name longer than MAX_PROTOCOL_NAME_LENGTH, and false otherwise.
- */
-bool
-protover_list_is_invalid(const char *s)
-{
-  return protover_contains_long_protocol_names_(s) != 0;
-}
-
-#endif /* defined(HAVE_RUST) */
-
diff --git a/src/core/or/relay.c b/src/core/or/relay.c
index f5a9e73856..68fddd1ae7 100644
--- a/src/core/or/relay.c
+++ b/src/core/or/relay.c
@@ -97,6 +97,8 @@
 #include "feature/nodelist/routerinfo_st.h"
 #include "core/or/socks_request_st.h"
 #include "core/or/sendme.h"
+#include "core/or/congestion_control_common.h"
+#include "core/or/congestion_control_flow.h"
 
 static edge_connection_t *relay_lookup_conn(circuit_t *circ, cell_t *cell,
                                             cell_direction_t cell_direction,
@@ -115,13 +117,6 @@ static void adjust_exit_policy_from_exitpolicy_failure(origin_circuit_t *circ,
                                                   node_t *node,
                                                   const tor_addr_t *addr);
 
-/** Stop reading on edge connections when we have this many cells
- * waiting on the appropriate queue. */
-#define CELL_QUEUE_HIGHWATER_SIZE 256
-/** Start reading from edge connections again when we get down to this many
- * cells. */
-#define CELL_QUEUE_LOWWATER_SIZE 64
-
 /** Stats: how many relay cells have originated at this hop, or have
  * been relayed onward (not recognized at this hop)?
  */
@@ -338,8 +333,17 @@ circuit_receive_relay_cell(cell_t *cell, circuit_t *circ,
       }
       return 0;
     }
-    log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL,
-           "Didn't recognize cell, but circ stops here! Closing circ.");
+    if (BUG(CIRCUIT_IS_ORIGIN(circ))) {
+      /* Should be impossible at this point. */
+      return -END_CIRC_REASON_TORPROTOCOL;
+    }
+    or_circuit_t *or_circ = TO_OR_CIRCUIT(circ);
+    if (++or_circ->n_cells_discarded_at_end == 1) {
+      time_t seconds_open = approx_time() - circ->timestamp_created.tv_sec;
+      log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL,
+             "Didn't recognize a cell, but circ stops here! Closing circuit. "
+             "It was created %ld seconds ago.", (long)seconds_open);
+    }
     return -END_CIRC_REASON_TORPROTOCOL;
   }
 
@@ -1574,6 +1578,7 @@ process_sendme_cell(const relay_header_t *rh, const cell_t *cell,
   }
 
   /* Stream level SENDME cell. */
+  // TODO: Turn this off for cc_alg=1,2,3; use XON/XOFF instead
   ret = sendme_process_stream_level(conn, circ, rh->length);
   if (ret < 0) {
     /* Means we need to close the circuit with reason ret. */
@@ -1738,6 +1743,44 @@ handle_relay_cell_command(cell_t *cell, circuit_t *circ,
       }
 
       return 0;
+    case RELAY_COMMAND_XOFF:
+      if (!conn) {
+        if (CIRCUIT_IS_ORIGIN(circ)) {
+          origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ);
+          if (relay_crypt_from_last_hop(ocirc, layer_hint) &&
+              connection_half_edge_is_valid_data(ocirc->half_streams,
+                                                rh->stream_id)) {
+            circuit_read_valid_data(ocirc, rh->length);
+          }
+        }
+        return 0;
+      }
+
+      if (circuit_process_stream_xoff(conn, layer_hint, cell)) {
+        if (CIRCUIT_IS_ORIGIN(circ)) {
+          circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), rh->length);
+        }
+      }
+      return 0;
+    case RELAY_COMMAND_XON:
+      if (!conn) {
+        if (CIRCUIT_IS_ORIGIN(circ)) {
+          origin_circuit_t *ocirc = TO_ORIGIN_CIRCUIT(circ);
+          if (relay_crypt_from_last_hop(ocirc, layer_hint) &&
+              connection_half_edge_is_valid_data(ocirc->half_streams,
+                                                rh->stream_id)) {
+            circuit_read_valid_data(ocirc, rh->length);
+          }
+        }
+        return 0;
+      }
+
+      if (circuit_process_stream_xon(conn, layer_hint, cell)) {
+        if (CIRCUIT_IS_ORIGIN(circ)) {
+          circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), rh->length);
+        }
+      }
+      return 0;
     case RELAY_COMMAND_END:
       reason = rh->length > 0 ?
         get_uint8(cell->payload+RELAY_HEADER_SIZE) : END_STREAM_REASON_MISC;
@@ -2091,6 +2134,7 @@ void
 circuit_reset_sendme_randomness(circuit_t *circ)
 {
   circ->have_sent_sufficiently_random_cell = 0;
+  // XXX: do we need to change this check for congestion control?
   circ->send_randomness_after_n_cells = CIRCWINDOW_INCREMENT / 2 +
     crypto_fast_rng_get_uint(get_thread_fast_rng(), CIRCWINDOW_INCREMENT / 2);
 }
@@ -2284,7 +2328,7 @@ connection_edge_package_raw_inbuf(edge_connection_t *conn, int package_partial,
   }
 
   /* Handle the stream-level SENDME package window. */
-  if (sendme_note_stream_data_packaged(conn) < 0) {
+  if (sendme_note_stream_data_packaged(conn, length) < 0) {
     connection_stop_reading(TO_CONN(conn));
     log_debug(domain,"conn->package_window reached 0.");
     circuit_consider_stop_edge_reading(circ, cpath_layer);
@@ -2350,15 +2394,16 @@ circuit_resume_edge_reading_helper(edge_connection_t *first_conn,
   /* How many cells do we have space for?  It will be the minimum of
    * the number needed to exhaust the package window, and the minimum
    * needed to fill the cell queue. */
-  max_to_package = circ->package_window;
+
+  max_to_package = congestion_control_get_package_window(circ, layer_hint);
   if (CIRCUIT_IS_ORIGIN(circ)) {
     cells_on_queue = circ->n_chan_cells.n;
   } else {
     or_circuit_t *or_circ = TO_OR_CIRCUIT(circ);
     cells_on_queue = or_circ->p_chan_cells.n;
   }
-  if (CELL_QUEUE_HIGHWATER_SIZE - cells_on_queue < max_to_package)
-    max_to_package = CELL_QUEUE_HIGHWATER_SIZE - cells_on_queue;
+  if (cell_queue_highwatermark() - cells_on_queue < max_to_package)
+    max_to_package = cell_queue_highwatermark() - cells_on_queue;
 
   /* Once we used to start listening on the streams in the order they
    * appeared in the linked list.  That leads to starvation on the
@@ -2398,7 +2443,8 @@ circuit_resume_edge_reading_helper(edge_connection_t *first_conn,
   /* Activate reading starting from the chosen stream */
   for (conn=chosen_stream; conn; conn = conn->next_stream) {
     /* Start reading for the streams starting from here */
-    if (conn->base_.marked_for_close || conn->package_window <= 0)
+    if (conn->base_.marked_for_close || conn->package_window <= 0 ||
+        conn->xoff_received)
       continue;
     if (!layer_hint || conn->cpath_layer == layer_hint) {
       connection_start_reading(TO_CONN(conn));
@@ -2409,7 +2455,8 @@ circuit_resume_edge_reading_helper(edge_connection_t *first_conn,
   }
   /* Go back and do the ones we skipped, circular-style */
   for (conn = first_conn; conn != chosen_stream; conn = conn->next_stream) {
-    if (conn->base_.marked_for_close || conn->package_window <= 0)
+    if (conn->base_.marked_for_close || conn->package_window <= 0 ||
+        conn->xoff_received)
       continue;
     if (!layer_hint || conn->cpath_layer == layer_hint) {
       connection_start_reading(TO_CONN(conn));
@@ -2495,7 +2542,7 @@ circuit_consider_stop_edge_reading(circuit_t *circ, crypt_path_t *layer_hint)
     or_circuit_t *or_circ = TO_OR_CIRCUIT(circ);
     log_debug(domain,"considering circ->package_window %d",
               circ->package_window);
-    if (circ->package_window <= 0) {
+    if (congestion_control_get_package_window(circ, layer_hint) <= 0) {
       log_debug(domain,"yes, not-at-origin. stopped.");
       for (conn = or_circ->n_streams; conn; conn=conn->next_stream)
         connection_stop_reading(TO_CONN(conn));
@@ -2506,7 +2553,7 @@ circuit_consider_stop_edge_reading(circuit_t *circ, crypt_path_t *layer_hint)
   /* else, layer hint is defined, use it */
   log_debug(domain,"considering layer_hint->package_window %d",
             layer_hint->package_window);
-  if (layer_hint->package_window <= 0) {
+  if (congestion_control_get_package_window(circ, layer_hint) <= 0) {
     log_debug(domain,"yes, at-origin. stopped.");
     for (conn = TO_ORIGIN_CIRCUIT(circ)->p_streams; conn;
          conn=conn->next_stream) {
@@ -2722,11 +2769,18 @@ cell_queues_get_total_allocation(void)
 /** The time at which we were last low on memory. */
 static time_t last_time_under_memory_pressure = 0;
 
+/** Statistics on how many bytes were removed by the OOM per type. */
+uint64_t oom_stats_n_bytes_removed_dns = 0;
+uint64_t oom_stats_n_bytes_removed_cell = 0;
+uint64_t oom_stats_n_bytes_removed_geoip = 0;
+uint64_t oom_stats_n_bytes_removed_hsdir = 0;
+
 /** Check whether we've got too much space used for cells.  If so,
  * call the OOM handler and return 1.  Otherwise, return 0. */
 STATIC int
 cell_queues_check_size(void)
 {
+  size_t removed = 0;
   time_t now = time(NULL);
   size_t alloc = cell_queues_get_total_allocation();
   alloc += half_streams_get_total_allocation();
@@ -2751,20 +2805,27 @@ cell_queues_check_size(void)
       if (hs_cache_total > get_options()->MaxMemInQueues / 5) {
         const size_t bytes_to_remove =
           hs_cache_total - (size_t)(get_options()->MaxMemInQueues / 10);
-        alloc -= hs_cache_handle_oom(now, bytes_to_remove);
+        removed = hs_cache_handle_oom(now, bytes_to_remove);
+        oom_stats_n_bytes_removed_hsdir += removed;
+        alloc -= removed;
       }
       if (geoip_client_cache_total > get_options()->MaxMemInQueues / 5) {
         const size_t bytes_to_remove =
           geoip_client_cache_total -
           (size_t)(get_options()->MaxMemInQueues / 10);
-        alloc -= geoip_client_cache_handle_oom(now, bytes_to_remove);
+        removed = geoip_client_cache_handle_oom(now, bytes_to_remove);
+        oom_stats_n_bytes_removed_geoip += removed;
+        alloc -= removed;
       }
       if (dns_cache_total > get_options()->MaxMemInQueues / 5) {
         const size_t bytes_to_remove =
           dns_cache_total - (size_t)(get_options()->MaxMemInQueues / 10);
-        alloc -= dns_cache_handle_oom(now, bytes_to_remove);
+        removed = dns_cache_handle_oom(now, bytes_to_remove);
+        oom_stats_n_bytes_removed_dns += removed;
+        alloc -= removed;
       }
-      circuits_handle_oom(alloc);
+      removed = circuits_handle_oom(alloc);
+      oom_stats_n_bytes_removed_cell += removed;
       return 1;
     }
   }
@@ -3062,7 +3123,7 @@ channel_flush_from_first_active_circuit, (channel_t *chan, int max))
 
     /* Is the cell queue low enough to unblock all the streams that are waiting
      * to write to this circuit? */
-    if (streams_blocked && queue->n <= CELL_QUEUE_LOWWATER_SIZE)
+    if (streams_blocked && queue->n <= cell_queue_lowwatermark())
       set_streams_blocked_on_circ(circ, chan, 0, 0); /* unblock streams */
 
     /* If n_flushed < max still, loop around and pick another circuit */
@@ -3180,7 +3241,7 @@ append_cell_to_circuit_queue(circuit_t *circ, channel_t *chan,
 
   /* If we have too many cells on the circuit, we should stop reading from
    * the edge streams for a while. */
-  if (!streams_blocked && queue->n >= CELL_QUEUE_HIGHWATER_SIZE)
+  if (!streams_blocked && queue->n >= cell_queue_highwatermark())
     set_streams_blocked_on_circ(circ, chan, 1, 0); /* block streams */
 
   if (streams_blocked && fromstream) {
diff --git a/src/core/or/relay.h b/src/core/or/relay.h
index 2f337d5d16..eac920f491 100644
--- a/src/core/or/relay.h
+++ b/src/core/or/relay.h
@@ -49,6 +49,11 @@ extern uint64_t stats_n_data_bytes_packaged;
 extern uint64_t stats_n_data_cells_received;
 extern uint64_t stats_n_data_bytes_received;
 
+extern uint64_t oom_stats_n_bytes_removed_dns;
+extern uint64_t oom_stats_n_bytes_removed_cell;
+extern uint64_t oom_stats_n_bytes_removed_geoip;
+extern uint64_t oom_stats_n_bytes_removed_hsdir;
+
 void dump_cell_pool_usage(int severity);
 size_t packed_cell_mem_cost(void);
 
diff --git a/src/core/or/sendme.c b/src/core/or/sendme.c
index ce3385ae98..ee670f9d51 100644
--- a/src/core/or/sendme.c
+++ b/src/core/or/sendme.c
@@ -21,6 +21,8 @@
 #include "core/or/or_circuit_st.h"
 #include "core/or/relay.h"
 #include "core/or/sendme.h"
+#include "core/or/congestion_control_common.h"
+#include "core/or/congestion_control_flow.h"
 #include "feature/nodelist/networkstatus.h"
 #include "lib/ctime/di_ops.h"
 #include "trunnel/sendme_cell.h"
@@ -64,13 +66,6 @@ pop_first_cell_digest(const circuit_t *circ)
     return NULL;
   }
 
-  /* More cell digest than the SENDME window is never suppose to happen. The
-   * cell should have been rejected before reaching this point due to its
-   * package_window down to 0 leading to a circuit close. Scream loudly but
-   * still pop the element so we don't memory leak. */
-  tor_assert_nonfatal(smartlist_len(circ->sendme_last_digests) <=
-                      CIRCWINDOW_START_MAX / CIRCWINDOW_INCREMENT);
-
   circ_digest = smartlist_get(circ->sendme_last_digests, 0);
   smartlist_del_keeporder(circ->sendme_last_digests, 0);
   return circ_digest;
@@ -334,17 +329,18 @@ record_cell_digest_on_circ(circuit_t *circ, const uint8_t *sendme_digest)
 /** Return true iff the next cell for the given cell window is expected to be
  * a SENDME.
  *
- * We are able to know that because the package or deliver window value minus
- * one cell (the possible SENDME cell) should be a multiple of the increment
- * window value. */
+ * We are able to know that because the package or inflight window value minus
+ * one cell (the possible SENDME cell) should be a multiple of the
+ * cells-per-sendme increment value (set via consensus parameter, negotiated
+ * for the circuit, and passed in as sendme_inc).
+ *
+ * This function is used when recording a cell digest and this is done quite
+ * low in the stack when decrypting or encrypting a cell. The window is only
+ * updated once the cell is actually put in the outbuf.
+ */
 static bool
-circuit_sendme_cell_is_next(int window)
+circuit_sendme_cell_is_next(int window, int sendme_inc)
 {
-  /* At the start of the window, no SENDME will be expected. */
-  if (window == CIRCWINDOW_START) {
-    return false;
-  }
-
   /* Are we at the limit of the increment and if not, we don't expect next
    * cell is a SENDME.
    *
@@ -352,11 +348,8 @@ circuit_sendme_cell_is_next(int window)
    * next cell is a SENDME, the window (either package or deliver) hasn't been
    * decremented just yet so when this is called, we are currently processing
    * the "window - 1" cell.
-   *
-   * This function is used when recording a cell digest and this is done quite
-   * low in the stack when decrypting or encrypting a cell. The window is only
-   * updated once the cell is actually put in the outbuf. */
-  if (((window - 1) % CIRCWINDOW_INCREMENT) != 0) {
+   */
+  if (((window - 1) % sendme_inc) != 0) {
     return false;
   }
 
@@ -378,6 +371,10 @@ sendme_connection_edge_consider_sending(edge_connection_t *conn)
 
   int log_domain = TO_CONN(conn)->type == CONN_TYPE_AP ? LD_APP : LD_EXIT;
 
+  /* If we use flow control, we do not send stream sendmes */
+  if (edge_uses_flow_control(conn))
+    goto end;
+
   /* Don't send it if we still have data to deliver. */
   if (connection_outbuf_too_full(TO_CONN(conn))) {
     goto end;
@@ -419,15 +416,16 @@ sendme_circuit_consider_sending(circuit_t *circ, crypt_path_t *layer_hint)
 {
   bool sent_one_sendme = false;
   const uint8_t *digest;
+  int sendme_inc = sendme_get_inc_count(circ, layer_hint);
 
   while ((layer_hint ? layer_hint->deliver_window : circ->deliver_window) <=
-          CIRCWINDOW_START - CIRCWINDOW_INCREMENT) {
+          CIRCWINDOW_START - sendme_inc) {
     log_debug(LD_CIRC,"Queuing circuit sendme.");
     if (layer_hint) {
-      layer_hint->deliver_window += CIRCWINDOW_INCREMENT;
+      layer_hint->deliver_window += sendme_inc;
       digest = cpath_get_sendme_digest(layer_hint);
     } else {
-      circ->deliver_window += CIRCWINDOW_INCREMENT;
+      circ->deliver_window += sendme_inc;
       digest = relay_crypto_get_sendme_digest(&TO_OR_CIRCUIT(circ)->crypto);
     }
     if (send_circuit_level_sendme(circ, layer_hint, digest) < 0) {
@@ -448,6 +446,9 @@ sendme_circuit_consider_sending(circuit_t *circ, crypt_path_t *layer_hint)
  * the length of the SENDME cell payload (excluding the header). The
  * cell_payload is the payload.
  *
+ * This function validates the SENDME's digest, and then dispatches to
+ * the appropriate congestion control algorithm in use on the circuit.
+ *
  * Return 0 on success (the SENDME is valid and the package window has
  * been updated properly).
  *
@@ -460,6 +461,7 @@ sendme_process_circuit_level(crypt_path_t *layer_hint,
 {
   tor_assert(circ);
   tor_assert(cell_payload);
+  congestion_control_t *cc;
 
   /* Validate the SENDME cell. Depending on the version, different validation
    * can be done. An invalid SENDME requires us to close the circuit. */
@@ -467,6 +469,34 @@ sendme_process_circuit_level(crypt_path_t *layer_hint,
     return -END_CIRC_REASON_TORPROTOCOL;
   }
 
+  // Get CC
+  if (layer_hint) {
+    cc = layer_hint->ccontrol;
+
+    /* origin circuits need to count valid sendmes as valid protocol data */
+    circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), cell_payload_len);
+  } else {
+    cc = circ->ccontrol;
+  }
+
+  /* If there is no CC object, assume fixed alg */
+  if (!cc) {
+    return sendme_process_circuit_level_impl(layer_hint, circ);
+  }
+
+  return congestion_control_dispatch_cc_alg(cc, circ, layer_hint);
+}
+
+/**
+ * Process a SENDME for Tor's original fixed window circuit-level flow control.
+ * Updates the package_window and ensures that it does not exceed the max.
+ *
+ * Returns -END_CIRC_REASON_TORPROTOCOL if the max is exceeded, otherwise
+ * returns 0.
+ */
+int
+sendme_process_circuit_level_impl(crypt_path_t *layer_hint, circuit_t *circ)
+{
   /* If we are the origin of the circuit, we are the Client so we use the
    * layer hint (the Exit hop) for the package window tracking. */
   if (CIRCUIT_IS_ORIGIN(circ)) {
@@ -486,10 +516,6 @@ sendme_process_circuit_level(crypt_path_t *layer_hint,
     layer_hint->package_window += CIRCWINDOW_INCREMENT;
     log_debug(LD_APP, "circ-level sendme at origin, packagewindow %d.",
               layer_hint->package_window);
-
-    /* We count circuit-level sendme's as valid delivered data because they
-     * are rate limited. */
-    circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), cell_payload_len);
   } else {
     /* We aren't the origin of this circuit so we are the Exit and thus we
      * track the package window with the circuit object. */
@@ -525,6 +551,12 @@ sendme_process_stream_level(edge_connection_t *conn, circuit_t *circ,
   tor_assert(conn);
   tor_assert(circ);
 
+  if (edge_uses_flow_control(conn)) {
+    log_fn(LOG_PROTOCOL_WARN, LD_EDGE,
+           "Congestion control got stream sendme");
+    return -END_CIRC_REASON_TORPROTOCOL;
+  }
+
   /* Don't allow the other endpoint to request more than our maximum (i.e.
    * initial) stream SENDME window worth of data. Well-behaved stock clients
    * will not request more than this max (as per the check in the while loop
@@ -582,7 +614,12 @@ int
 sendme_stream_data_received(edge_connection_t *conn)
 {
   tor_assert(conn);
-  return --conn->deliver_window;
+
+  if (edge_uses_flow_control(conn)) {
+    return flow_control_decide_xoff(conn);
+  } else {
+    return --conn->deliver_window;
+  }
 }
 
 /* Called when a relay DATA cell is packaged on the given circuit. If
@@ -592,34 +629,56 @@ int
 sendme_note_circuit_data_packaged(circuit_t *circ, crypt_path_t *layer_hint)
 {
   int package_window, domain;
+  congestion_control_t *cc;
 
   tor_assert(circ);
 
-  if (CIRCUIT_IS_ORIGIN(circ)) {
-    /* Client side. */
-    tor_assert(layer_hint);
-    --layer_hint->package_window;
-    package_window = layer_hint->package_window;
+  if (layer_hint) {
+    cc = layer_hint->ccontrol;
     domain = LD_APP;
   } else {
-    /* Exit side. */
-    tor_assert(!layer_hint);
-    --circ->package_window;
-    package_window = circ->package_window;
+    cc = circ->ccontrol;
     domain = LD_EXIT;
   }
 
-  log_debug(domain, "Circuit package_window now %d.", package_window);
-  return package_window;
+  if (cc) {
+    congestion_control_note_cell_sent(cc, circ, layer_hint);
+  } else {
+    /* Fixed alg uses package_window and must update it */
+
+    if (CIRCUIT_IS_ORIGIN(circ)) {
+      /* Client side. */
+      tor_assert(layer_hint);
+      --layer_hint->package_window;
+      package_window = layer_hint->package_window;
+    } else {
+      /* Exit side. */
+      tor_assert(!layer_hint);
+      --circ->package_window;
+      package_window = circ->package_window;
+    }
+    log_debug(domain, "Circuit package_window now %d.", package_window);
+  }
+
+  /* Return appropriate number designating how many cells can still be sent */
+  return congestion_control_get_package_window(circ, layer_hint);
 }
 
 /* Called when a relay DATA cell is packaged for the given edge connection
  * conn. Update the package window and return its new value. */
 int
-sendme_note_stream_data_packaged(edge_connection_t *conn)
+sendme_note_stream_data_packaged(edge_connection_t *conn, size_t len)
 {
   tor_assert(conn);
 
+  if (edge_uses_flow_control(conn)) {
+    flow_control_note_sent_data(conn, len);
+    if (conn->xoff_received)
+      return -1;
+    else
+      return 1;
+  }
+
   --conn->package_window;
   log_debug(LD_APP, "Stream package_window now %d.", conn->package_window);
   return conn->package_window;
@@ -631,20 +690,14 @@ sendme_note_stream_data_packaged(edge_connection_t *conn)
 void
 sendme_record_cell_digest_on_circ(circuit_t *circ, crypt_path_t *cpath)
 {
-  int package_window;
   uint8_t *sendme_digest;
 
   tor_assert(circ);
 
-  package_window = circ->package_window;
-  if (cpath) {
-    package_window = cpath->package_window;
-  }
-
   /* Is this the last cell before a SENDME? The idea is that if the
    * package_window reaches a multiple of the increment, after this cell, we
    * should expect a SENDME. */
-  if (!circuit_sendme_cell_is_next(package_window)) {
+  if (!circuit_sent_cell_for_sendme(circ, cpath)) {
     return;
   }
 
@@ -670,7 +723,8 @@ sendme_record_received_cell_digest(circuit_t *circ, crypt_path_t *cpath)
 
   /* Only record if the next cell is expected to be a SENDME. */
   if (!circuit_sendme_cell_is_next(cpath ? cpath->deliver_window :
-                                           circ->deliver_window)) {
+                                           circ->deliver_window,
+                                   sendme_get_inc_count(circ, cpath))) {
     return;
   }
 
@@ -692,8 +746,7 @@ sendme_record_sending_cell_digest(circuit_t *circ, crypt_path_t *cpath)
   tor_assert(circ);
 
   /* Only record if the next cell is expected to be a SENDME. */
-  if (!circuit_sendme_cell_is_next(cpath ? cpath->package_window :
-                                           circ->package_window)) {
+  if (!circuit_sent_cell_for_sendme(circ, cpath)) {
     goto end;
   }
 
diff --git a/src/core/or/sendme.h b/src/core/or/sendme.h
index a008940905..2abec91a91 100644
--- a/src/core/or/sendme.h
+++ b/src/core/or/sendme.h
@@ -22,6 +22,7 @@ void sendme_circuit_consider_sending(circuit_t *circ,
 int sendme_process_circuit_level(crypt_path_t *layer_hint,
                                  circuit_t *circ, const uint8_t *cell_payload,
                                  uint16_t cell_payload_len);
+int sendme_process_circuit_level_impl(crypt_path_t *, circuit_t *);
 int sendme_process_stream_level(edge_connection_t *conn, circuit_t *circ,
                                 uint16_t cell_body_len);
 
@@ -32,7 +33,7 @@ int sendme_circuit_data_received(circuit_t *circ, crypt_path_t *layer_hint);
 /* Update package window functions. */
 int sendme_note_circuit_data_packaged(circuit_t *circ,
                                       crypt_path_t *layer_hint);
-int sendme_note_stream_data_packaged(edge_connection_t *conn);
+int sendme_note_stream_data_packaged(edge_connection_t *conn, size_t len);
 
 /* Record cell digest on circuit. */
 void sendme_record_cell_digest_on_circ(circuit_t *circ, crypt_path_t *cpath);
diff --git a/src/core/or/status.c b/src/core/or/status.c
index 9e7ae70535..1e599aafb3 100644
--- a/src/core/or/status.c
+++ b/src/core/or/status.c
@@ -147,6 +147,32 @@ note_connection(bool inbound, int family)
   }
 }
 
+/**
+ * @name Counters for unrecognized cells
+ *
+ * Track cells that we drop because they are unrecognized and we have
+ * nobody to send them to.
+ **/
+/**@{*/
+static unsigned n_circs_closed_for_unrecognized_cells;
+static uint64_t n_unrecognized_cells_discarded;
+static uint64_t n_secs_on_circs_with_unrecognized_cells;
+/**@}*/
+
+/**
+ * Note that a circuit has closed @a n_seconds after having been created,
+ * because of one or more unrecognized cells.  Also note the number of
+ * unrecognized cells @a n_cells.
+ */
+void
+note_circ_closed_for_unrecognized_cells(time_t n_seconds, uint32_t n_cells)
+{
+  ++n_circs_closed_for_unrecognized_cells;
+  n_unrecognized_cells_discarded += n_cells;
+  if (n_seconds >= 0)
+    n_secs_on_circs_with_unrecognized_cells += (uint64_t) n_seconds;
+}
+
 /** Log a "heartbeat" message describing Tor's status and history so that the
  * user can know that there is indeed a running Tor.  Return 0 on success and
  * -1 on failure. */
@@ -240,6 +266,23 @@ log_heartbeat(time_t now)
          (main_loop_idle_count));
   }
 
+  if (n_circs_closed_for_unrecognized_cells) {
+    double avg_time_alive = ((double) n_secs_on_circs_with_unrecognized_cells)
+      / n_circs_closed_for_unrecognized_cells;
+    double avg_cells = ((double) n_unrecognized_cells_discarded)
+      / n_circs_closed_for_unrecognized_cells;
+    log_fn(LOG_NOTICE, LD_HEARTBEAT,
+        "Since our last heartbeat, %u circuits were closed because of "
+        "unrecognized cells while we were the last hop. On average, each "
+        "one was alive for %lf seconds, and had %lf unrecognized cells.",
+        n_circs_closed_for_unrecognized_cells,
+        avg_time_alive,
+        avg_cells);
+    n_circs_closed_for_unrecognized_cells = 0;
+    n_unrecognized_cells_discarded = 0;
+    n_secs_on_circs_with_unrecognized_cells = 0;
+  }
+
   /** Now, if we are an HS service, log some stats about our usage */
   log_onion_service_stats();
 
diff --git a/src/core/or/status.h b/src/core/or/status.h
index 927df9a192..57e28002fc 100644
--- a/src/core/or/status.h
+++ b/src/core/or/status.h
@@ -12,6 +12,9 @@
 #include "lib/testsupport/testsupport.h"
 
 void note_connection(bool inbound, int family);
+void note_circ_closed_for_unrecognized_cells(time_t n_seconds,
+                                             uint32_t n_cells);
+
 int log_heartbeat(time_t now);
 
 #ifdef STATUS_PRIVATE
diff --git a/src/core/or/trace_probes_cc.c b/src/core/or/trace_probes_cc.c
new file mode 100644
index 0000000000..d52646da4f
--- /dev/null
+++ b/src/core/or/trace_probes_cc.c
@@ -0,0 +1,33 @@
+/* Copyright (c) 2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file trace_probes_cc.c
+ * \brief Tracepoint provider source file for the cc subsystem. Probes
+ *        are generated within this C file for LTTng-UST
+ **/
+
+#include "orconfig.h"
+
+/*
+ * Following section is specific to LTTng-UST.
+ */
+#ifdef USE_TRACING_INSTRUMENTATION_LTTNG
+
+/* Header files that the probes need. */
+#include "core/or/or.h"
+#include "core/or/channel.h"
+#include "core/or/circuit_st.h"
+#include "core/or/circuitlist.h"
+#include "core/or/congestion_control_st.h"
+#include "core/or/connection_st.h"
+#include "core/or/edge_connection_st.h"
+#include "core/or/or_circuit_st.h"
+#include "core/or/origin_circuit_st.h"
+
+#define TRACEPOINT_DEFINE
+#define TRACEPOINT_CREATE_PROBES
+
+#include "core/or/trace_probes_cc.h"
+
+#endif /* defined(USE_TRACING_INSTRUMENTATION_LTTNG) */
diff --git a/src/core/or/trace_probes_cc.h b/src/core/or/trace_probes_cc.h
new file mode 100644
index 0000000000..1f87528723
--- /dev/null
+++ b/src/core/or/trace_probes_cc.h
@@ -0,0 +1,22 @@
+/* Copyright (c) 2021, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file trace_probes_cc.c
+ * \brief The tracing probes for the congestion control subsystem.
+ *        Currently, only LTTng-UST probes are available.
+ **/
+
+#ifndef TOR_TRACE_PROBES_CC_H
+#define TOR_TRACE_PROBES_CC_H
+
+#include "lib/trace/events.h"
+
+/* We only build the following if LTTng instrumentation has been enabled. */
+#ifdef USE_TRACING_INSTRUMENTATION_LTTNG
+
+#include "core/or/lttng_cc.inc"
+
+#endif /* USE_TRACING_INSTRUMENTATION_LTTNG */
+
+#endif /* !defined(TOR_TRACE_PROBES_CC_H) */