Merge remote-tracking branch 'asn-github/adaptive_padding-final'

author: Nick Mathewson <nickm@torproject.org> 2019-01-14 14:48:00 -0500
committer: Nick Mathewson <nickm@torproject.org> 2019-01-14 14:48:00 -0500
commit: b169c8c14f23394b40305f38ee4ce08add278e27 (patch)
tree: 0649da16a97792103773f9d5cedbfd75deac49bd
parent: 691dec5d4615dec9a845d0f7dea7ef55cc66fe62 (diff)
parent: b269ab5aaeee65a3a0b1e5e0923d9dc7898c232e (diff)
download: tor-b169c8c14f23394b40305f38ee4ce08add278e27.tar.gz
tor-b169c8c14f23394b40305f38ee4ce08add278e27.zip
53 files changed, 10102 insertions, 40 deletions
diff --git a/doc/tor.1.txt b/doc/tor.1.txt
index b058bebcb3..92355dfb54 100644
--- a/doc/tor.1.txt
+++ b/doc/tor.1.txt
@@ -1021,6 +1021,26 @@ The following options are useful only for clients (that is, if
     The .exit address notation, if enabled via MapAddress, overrides
     this option.
 
+[[MiddleNodes]] **MiddleNodes** __node__,__node__,__...__::
+    A list of identity fingerprints and country codes of nodes
+    to use for "middle" hops in your normal circuits.
+    Normal circuits include all circuits except for direct connections
+    to directory servers. Middle hops are all hops other than exit and entry. +
++
+    This is an **experimental** feature that is meant to be used by researchers
+    and developers to test new features in the Tor network safely. Using it
+    without care will strongly influence your anonymity. This feature might get
+    removed in the future.
++
+    The HSLayer2Node and HSLayer3Node options override this option for onion
+    service circuits, if they are set. The vanguards addon will read this
+    option, and if set, it will set HSLayer2Nodes and HSLayer3Nodes to nodes
+    from this set.
++
+    The ExcludeNodes option overrides this option: any node listed in both
+    MiddleNodes and ExcludeNodes is treated as excluded. See
+    the **ExcludeNodes** option for more information on how to specify nodes.
+
 [[EntryNodes]] **EntryNodes** __node__,__node__,__...__::
     A list of identity fingerprints and country codes of nodes
     to use for the first hop in your normal circuits.
@@ -1037,13 +1057,14 @@ The following options are useful only for clients (that is, if
     If StrictNodes is set to 1, Tor will treat solely the ExcludeNodes option
     as a requirement to follow for all the circuits you generate, even if
     doing so will break functionality for you (StrictNodes applies to neither
-    ExcludeExitNodes nor to ExitNodes).  If StrictNodes is set to 0, Tor will
-    still try to avoid nodes in the ExcludeNodes list, but it will err on the
-    side of avoiding unexpected errors.  Specifically, StrictNodes 0 tells Tor
-    that it is okay to use an excluded node when it is *necessary* to perform
-    relay reachability self-tests, connect to a hidden service, provide a
-    hidden service to a client, fulfill a .exit request, upload directory
-    information, or download directory information.  (Default: 0)
+    ExcludeExitNodes nor to ExitNodes, nor to MiddleNodes).  If StrictNodes
+    is set to 0, Tor will still try to avoid nodes in the ExcludeNodes list,
+    but it will err on the side of avoiding unexpected errors.
+    Specifically, StrictNodes 0 tells Tor that it is okay to use an excluded
+    node when it is *necessary* to perform relay reachability self-tests,
+    connect to a hidden service, provide a hidden service to a client,
+    fulfill a .exit request, upload directory information, or download
+    directory information.  (Default: 0)
 
 [[FascistFirewall]] **FascistFirewall** **0**|**1**::
     If 1, Tor will only create outgoing connections to ORs running on ports
diff --git a/src/app/config/config.c b/src/app/config/config.c
index 22070c346b..ecf4c21545 100644
--- a/src/app/config/config.c
+++ b/src/app/config/config.c
@@ -421,6 +421,10 @@ static config_var_t option_vars_[] = {
   V(ExcludeExitNodes,            ROUTERSET, NULL),
   OBSOLETE("ExcludeSingleHopRelays"),
   V(ExitNodes,                   ROUTERSET, NULL),
+  /* Researchers need a way to tell their clients to use specific
+   * middles that they also control, to allow safe live-network
+   * experimentation with new padding machines. */
+  V(MiddleNodes,                 ROUTERSET, NULL),
   V(ExitPolicy,                  LINELIST, NULL),
   V(ExitPolicyRejectPrivate,     BOOL,     "1"),
   V(ExitPolicyRejectLocalInterfaces, BOOL, "0"),
@@ -1693,6 +1697,7 @@ options_need_geoip_info(const or_options_t *options, const char **reason_out)
   int routerset_usage =
     routerset_needs_geoip(options->EntryNodes) ||
     routerset_needs_geoip(options->ExitNodes) ||
+    routerset_needs_geoip(options->MiddleNodes) ||
     routerset_needs_geoip(options->ExcludeExitNodes) ||
     routerset_needs_geoip(options->ExcludeNodes) ||
     routerset_needs_geoip(options->HSLayer2Nodes) ||
@@ -2132,6 +2137,7 @@ options_act(const or_options_t *old_options)
                          options->HSLayer2Nodes) ||
         !routerset_equal(old_options->HSLayer3Nodes,
                          options->HSLayer3Nodes) ||
+        !routerset_equal(old_options->MiddleNodes, options->MiddleNodes) ||
         options->StrictNodes != old_options->StrictNodes) {
       log_info(LD_CIRC,
                "Changed to using entry guards or bridges, or changed "
diff --git a/src/app/config/or_options_st.h b/src/app/config/or_options_st.h
index c2bc1079a5..63a17c9771 100644
--- a/src/app/config/or_options_st.h
+++ b/src/app/config/or_options_st.h
@@ -72,6 +72,9 @@ struct or_options_t {
   routerset_t *ExitNodes; /**< Structure containing nicknames, digests,
                            * country codes and IP address patterns of ORs to
                            * consider as exits. */
+  routerset_t *MiddleNodes; /**< Structure containing nicknames, digests,
+                             * country codes and IP address patterns of ORs to
+                             * consider as middles. */
   routerset_t *EntryNodes;/**< Structure containing nicknames, digests,
                            * country codes and IP address patterns of ORs to
                            * consider as entry points. */
diff --git a/src/app/main/main.c b/src/app/main/main.c
index d71e43ec30..ba2dfebd77 100644
--- a/src/app/main/main.c
+++ b/src/app/main/main.c
@@ -22,6 +22,7 @@
 #include "core/mainloop/netstatus.h"
 #include "core/or/channel.h"
 #include "core/or/channelpadding.h"
+#include "core/or/circuitpadding.h"
 #include "core/or/channeltls.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuitmux_ewma.h"
@@ -645,9 +646,13 @@ tor_init(int argc, char *argv[])
   /* The options are now initialised */
   const or_options_t *options = get_options();
 
-  /* Initialize channelpadding parameters to defaults until we get
-   * a consensus */
+  /* Initialize channelpadding and circpad parameters to defaults
+   * until we get a consensus */
   channelpadding_new_consensus_params(NULL);
+  circpad_new_consensus_params(NULL);
+
+  /* Initialize circuit padding to defaults+torrc until we get a consensus */
+  circpad_machines_init();
 
   /* Initialize predicted ports list after loading options */
   predicted_ports_init();
@@ -766,6 +771,7 @@ tor_free_all(int postfork)
   dns_free_all();
   clear_pending_onions();
   circuit_free_all();
+  circpad_machines_free();
   entry_guards_free_all();
   pt_free_all();
   channel_tls_free_all();
diff --git a/src/core/include.am b/src/core/include.am
index 5e69cb9ada..ae47c75e09 100644
--- a/src/core/include.am
+++ b/src/core/include.am
@@ -32,6 +32,7 @@ LIBTOR_APP_A_SOURCES = 				\
 	src/core/or/circuitlist.c		\
 	src/core/or/circuitmux.c		\
 	src/core/or/circuitmux_ewma.c		\
+	src/core/or/circuitpadding.c		\
 	src/core/or/circuitstats.c		\
 	src/core/or/circuituse.c		\
 	src/core/or/command.c			\
@@ -227,6 +228,7 @@ noinst_HEADERS +=					\
 	src/core/or/circuitmux.h			\
 	src/core/or/circuitmux_ewma.h			\
 	src/core/or/circuitstats.h			\
+	src/core/or/circuitpadding.h			\
 	src/core/or/circuituse.h			\
 	src/core/or/command.h				\
 	src/core/or/connection_edge.h			\
diff --git a/src/core/or/circuit_st.h b/src/core/or/circuit_st.h
index 2e33b37b01..29bcaa098f 100644
--- a/src/core/or/circuit_st.h
+++ b/src/core/or/circuit_st.h
@@ -12,6 +12,11 @@
 #include "core/or/cell_queue_st.h"
 
 struct hs_token_t;
+struct circpad_machine_spec_t;
+struct circpad_machine_state_t;
+
+/** Number of padding state machines on a circuit. */
+#define CIRCPAD_MAX_MACHINES (2)
 
 /** "magic" value for an origin_circuit_t */
 #define ORIGIN_CIRCUIT_MAGIC 0x35315243u
@@ -177,6 +182,27 @@ struct circuit_t {
   /** Hashtable node: used to look up the circuit by its HS token using the HS
       circuitmap. */
   HT_ENTRY(circuit_t) hs_circuitmap_node;
+
+  /** Adaptive Padding state machines: these are immutable. The state machines
+   *  that come from the consensus are saved to a global structure, to avoid
+   *  per-circuit allocations. This merely points to the global copy in
+   *  origin_padding_machines or relay_padding_machines that should never
+   *  change or get deallocated.
+   *
+   *  Each element of this array corresponds to a different padding machine,
+   *  and we can have up to CIRCPAD_MAX_MACHINES such machines. */
+  const struct circpad_machine_spec_t *padding_machine[CIRCPAD_MAX_MACHINES];
+
+  /** Adaptive Padding machine info for above machines. This is the
+   *  per-circuit mutable information, such as the current state and
+   *  histogram token counts. Some of it is optional (aka NULL).
+   *  If a machine is being shut down, these indexes can be NULL
+   *  without the corresponding padding_machine being NULL, while we
+   *  wait for the other end to respond to our shutdown request.
+   *
+   *  Each element of this array corresponds to a different padding machine,
+   *  and we can have up to CIRCPAD_MAX_MACHINES such machines. */
+  struct circpad_machine_state_t *padding_info[CIRCPAD_MAX_MACHINES];
 };
 
 #endif
diff --git a/src/core/or/circuitbuild.c b/src/core/or/circuitbuild.c
index b89ec09a99..22e4cf96d8 100644
--- a/src/core/or/circuitbuild.c
+++ b/src/core/or/circuitbuild.c
@@ -43,6 +43,7 @@
 #include "core/or/circuitlist.h"
 #include "core/or/circuitstats.h"
 #include "core/or/circuituse.h"
+#include "core/or/circuitpadding.h"
 #include "core/or/command.h"
 #include "core/or/connection_edge.h"
 #include "core/or/connection_or.h"
@@ -950,12 +951,15 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
   crypt_path_t *hop = onion_next_hop_in_cpath(circ->cpath);
   circuit_build_times_handle_completed_hop(circ);
 
+  circpad_machine_event_circ_added_hop(circ);
+
   if (hop) {
     /* Case two: we're on a hop after the first. */
     return circuit_send_intermediate_onion_skin(circ, hop);
   }
 
   /* Case three: the circuit is finished. Do housekeeping tasks on it. */
+  circpad_machine_event_circ_built(circ);
   return circuit_build_no_more_hops(circ);
 }
 
@@ -2606,7 +2610,24 @@ choose_good_middle_server(uint8_t purpose,
     return choice;
   }
 
-  choice = router_choose_random_node(excluded, options->ExcludeNodes, flags);
+  if (options->MiddleNodes) {
+    smartlist_t *sl = smartlist_new();
+    routerset_get_all_nodes(sl, options->MiddleNodes,
+                            options->ExcludeNodes, 1);
+
+    smartlist_subtract(sl, excluded);
+
+    choice = node_sl_choose_by_bandwidth(sl, WEIGHT_FOR_MID);
+    smartlist_free(sl);
+    if (choice) {
+      log_fn(LOG_INFO, LD_CIRC, "Chose fixed middle node: %s",
+          hex_str(choice->identity, DIGEST_LEN));
+    } else {
+      log_fn(LOG_NOTICE, LD_CIRC, "Restricted middle not available");
+    }
+  } else {
+    choice = router_choose_random_node(excluded, options->ExcludeNodes, flags);
+  }
   smartlist_free(excluded);
   return choice;
 }
diff --git a/src/core/or/circuitlist.c b/src/core/or/circuitlist.c
index c4b5f7ee3e..71f8becddc 100644
--- a/src/core/or/circuitlist.c
+++ b/src/core/or/circuitlist.c
@@ -62,6 +62,7 @@
 #include "core/or/circuitlist.h"
 #include "core/or/circuituse.h"
 #include "core/or/circuitstats.h"
+#include "core/or/circuitpadding.h"
 #include "core/mainloop/connection.h"
 #include "app/config/config.h"
 #include "core/or/connection_edge.h"
@@ -1231,6 +1232,9 @@ circuit_free_(circuit_t *circ)
            CIRCUIT_IS_ORIGIN(circ) ?
               TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0);
 
+  /* Free any circuit padding structures */
+  circpad_circuit_free_all_machineinfos(circ);
+
   if (should_free) {
     memwipe(mem, 0xAA, memlen); /* poison memory */
     tor_free(mem);
diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c
new file mode 100644
index 0000000000..0dadc52139
--- /dev/null
+++ b/src/core/or/circuitpadding.c
@@ -0,0 +1,2562 @@
+/* Copyright (c) 2017 The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file circuitpadding.c
+ * \brief Circuit-level padding implementation
+ *
+ * \details
+ *
+ * This file implements Tor proposal 254 "Padding Negotiation" which is heavily
+ * inspired by the paper "Toward an Efficient Website Fingerprinting Defense"
+ * by M. Juarez, M. Imani, M. Perry, C. Diaz, M. Wright.
+ *
+ * In particular the code in this file describes mechanisms for clients to
+ * negotiate various types of circuit-level padding from relays.
+ *
+ * Each padding type is described by a state machine (circpad_machine_spec_t),
+ * which is also referred as a "padding machine" in this file.  Currently,
+ * these state machines are hardcoded in the source code (e.g. see
+ * circpad_circ_client_machine_init()), but in the future we will be able to
+ * serialize them in the torrc or the consensus.
+ *
+ * As specified by prop#254, clients can negotiate padding with relays by using
+ * PADDING_NEGOTIATE cells. After successful padding negotiation, padding
+ * machines are assigned to the circuit in their mutable form as a
+ * circpad_machine_state_t.
+ *
+ * Each state of a padding state machine can be either:
+ * - A histogram that specifies inter-arrival padding delays.
+ * - Or a parametrized probability distribution that specifies inter-arrival
+ *   delays (see circpad_distribution_type_t).
+ *
+ * Padding machines start from the START state and finish with the END
+ * state. They can transition between states using the events in
+ * circpad_event_t.
+ *
+ * When a padding machine reaches the END state, it gets wiped from the circuit
+ * so that other padding machines can take over if needed (see
+ * circpad_machine_spec_transitioned_to_end()).
+ **/
+
+#define CIRCUITPADDING_PRIVATE
+
+#include <math.h>
+#include "lib/math/fp.h"
+#include "lib/math/prob_distr.h"
+#include "core/or/or.h"
+#include "core/or/circuitpadding.h"
+#include "core/or/circuitlist.h"
+#include "core/or/circuituse.h"
+#include "core/or/relay.h"
+#include "feature/stats/rephist.h"
+#include "feature/nodelist/networkstatus.h"
+
+#include "core/or/channel.h"
+
+#include "lib/time/compat_time.h"
+#include "lib/defs/time.h"
+#include "lib/crypt_ops/crypto_rand.h"
+
+#include "core/or/crypt_path_st.h"
+#include "core/or/circuit_st.h"
+#include "core/or/origin_circuit_st.h"
+#include "feature/nodelist/routerstatus_st.h"
+#include "feature/nodelist/node_st.h"
+#include "core/or/cell_st.h"
+#include "core/or/extend_info_st.h"
+#include "core/crypto/relay_crypto.h"
+#include "feature/nodelist/nodelist.h"
+
+#include "app/config/config.h"
+
+static inline circpad_purpose_mask_t circpad_circ_purpose_to_mask(uint8_t
+                                          circ_purpose);
+static inline circpad_circuit_state_t circpad_circuit_state(
+                                        origin_circuit_t *circ);
+static void circpad_setup_machine_on_circ(circuit_t *on_circ,
+                                        const circpad_machine_spec_t *machine);
+static double circpad_distribution_sample(circpad_distribution_t dist);
+
+/** Cached consensus params */
+static uint8_t circpad_global_max_padding_percent;
+static uint16_t circpad_global_allowed_cells;
+
+/** Global cell counts, for rate limiting */
+static uint64_t circpad_global_padding_sent;
+static uint64_t circpad_global_nonpadding_sent;
+
+/** This is the list of circpad_machine_spec_t's parsed from consensus and
+ *  torrc that have origin_side == 1 (ie: are for client side).
+ *
+ *  The machines in this smartlist are considered immutable and they are used
+ *  as-is by circuits so they should not change or get deallocated in Tor's
+ *  runtime and as long as circuits are alive. */
+STATIC smartlist_t *origin_padding_machines = NULL;
+
+/** This is the list of circpad_machine_spec_t's parsed from consensus and
+ *  torrc that have origin_side == 0 (ie: are for relay side).
+ *
+ *  The machines in this smartlist are considered immutable and they are used
+ *  as-is by circuits so they should not change or get deallocated in Tor's
+ *  runtime and as long as circuits are alive. */
+STATIC smartlist_t *relay_padding_machines = NULL;
+
+/** Loop over the current padding state machines using <b>loop_var</b> as the
+ *  loop variable. */
+#define FOR_EACH_CIRCUIT_MACHINE_BEGIN(loop_var)                         \
+  STMT_BEGIN                                                             \
+  for (int loop_var = 0; loop_var < CIRCPAD_MAX_MACHINES; loop_var++) {
+#define FOR_EACH_CIRCUIT_MACHINE_END } STMT_END ;
+
+/** Loop over the current active padding state machines using <b>loop_var</b>
+ *  as the loop variable. If a machine is not active, skip it. */
+#define FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(loop_var, circ)            \
+  FOR_EACH_CIRCUIT_MACHINE_BEGIN(loop_var)                               \
+  if (!(circ)->padding_info[loop_var])                           \
+    continue;
+#define FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END } STMT_END ;
+
+/**
+ * Return a human-readable description for a circuit padding state.
+ */
+static const char *
+circpad_state_to_string(circpad_statenum_t state)
+{
+  const char *descr;
+
+  switch (state) {
+  case CIRCPAD_STATE_START:
+    descr = "START";
+    break;
+  case CIRCPAD_STATE_BURST:
+    descr = "BURST";
+    break;
+  case CIRCPAD_STATE_GAP:
+    descr = "GAP";
+    break;
+  case CIRCPAD_STATE_END:
+    descr = "END";
+    break;
+  default:
+    descr = "CUSTOM"; // XXX: Just return # in static char buf?
+  }
+
+  return descr;
+}
+
+/**
+ * Free the machineinfo at an index
+ */
+static void
+circpad_circuit_machineinfo_free_idx(circuit_t *circ, int idx)
+{
+  if (circ->padding_info[idx]) {
+    tor_free(circ->padding_info[idx]->histogram);
+    timer_free(circ->padding_info[idx]->padding_timer);
+    tor_free(circ->padding_info[idx]);
+  }
+}
+
+/** Free all the machineinfos in <b>circ</b> that match <b>machine_num</b>. */
+static void
+free_circ_machineinfos_with_machine_num(circuit_t *circ, int machine_num)
+{
+  FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) {
+    if (circ->padding_machine[i] &&
+        circ->padding_machine[i]->machine_num == machine_num) {
+      circpad_circuit_machineinfo_free_idx(circ, i);
+      circ->padding_machine[i] = NULL;
+    }
+  } FOR_EACH_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * Free all padding machines and mutable info associated with circuit
+ */
+void
+circpad_circuit_free_all_machineinfos(circuit_t *circ)
+{
+  FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) {
+    circpad_circuit_machineinfo_free_idx(circ, i);
+  } FOR_EACH_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * Allocate a new mutable machineinfo structure.
+ */
+STATIC circpad_machine_state_t *
+circpad_circuit_machineinfo_new(circuit_t *on_circ, int machine_index)
+{
+  circpad_machine_state_t *mi =
+    tor_malloc_zero(sizeof(circpad_machine_state_t));
+  mi->machine_index = machine_index;
+  mi->on_circ = on_circ;
+
+  return mi;
+}
+
+/**
+ * Return the circpad_state_t for the current state based on the
+ * mutable info.
+ *
+ * This function returns NULL when the machine is in the end state or in an
+ * invalid state.
+ */
+STATIC const circpad_state_t *
+circpad_machine_current_state(const circpad_machine_state_t *mi)
+{
+  const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi);
+
+  if (mi->current_state == CIRCPAD_STATE_END) {
+    return NULL;
+  } else if (BUG(mi->current_state >= machine->num_states)) {
+    log_fn(LOG_WARN,LD_CIRC,
+           "Invalid circuit padding state %d",
+           mi->current_state);
+
+    return NULL;
+  }
+
+  return &machine->states[mi->current_state];
+}
+
+/**
+ * Calculate the lower bound of a histogram bin. The upper bound
+ * is obtained by calling this function with bin+1, and subtracting 1.
+ *
+ * The 0th bin has a special value -- it only represents start_usec.
+ * This is so we can specify a probability on 0-delay values.
+ *
+ * After bin 0, bins are exponentially spaced, so that each subsequent
+ * bin is twice as large as the previous. This is done so that higher
+ * time resolution is given to lower time values.
+ *
+ * The infinity bin is a the last bin in the array (histogram_len-1).
+ * It has a usec value of CIRCPAD_DELAY_INFINITE (UINT32_MAX).
+ */
+STATIC circpad_delay_t
+circpad_histogram_bin_to_usec(const circpad_machine_state_t *mi,
+                              circpad_hist_index_t bin)
+{
+  const circpad_state_t *state = circpad_machine_current_state(mi);
+  circpad_delay_t start_usec;
+
+  /* Our state should have been checked to be non-null by the caller
+   * (circpad_machine_remove_token()) */
+  if (BUG(state == NULL)) {
+    return CIRCPAD_DELAY_INFINITE;
+  }
+
+  if (state->use_rtt_estimate)
+    start_usec = mi->rtt_estimate_usec+state->start_usec;
+  else
+    start_usec = state->start_usec;
+
+  if (bin >= CIRCPAD_INFINITY_BIN(state))
+    return CIRCPAD_DELAY_INFINITE;
+
+  if (bin == 0)
+    return start_usec;
+
+  if (bin == 1)
+    return start_usec+1;
+
+  /* The bin widths double every index, so that we can have more resolution
+   * for lower time values in the histogram. */
+  const circpad_time_t bin_width_exponent =
+        1 << (CIRCPAD_INFINITY_BIN(state) - bin);
+  return (circpad_delay_t)MIN(start_usec +
+                              state->range_usec/bin_width_exponent,
+                              CIRCPAD_DELAY_INFINITE);
+}
+
+/** Return the midpoint of the histogram bin <b>bin_index</b>. */
+static circpad_delay_t
+circpad_get_histogram_bin_midpoint(const circpad_machine_state_t *mi,
+                           int bin_index)
+{
+  circpad_delay_t left_bound = circpad_histogram_bin_to_usec(mi, bin_index);
+  circpad_delay_t right_bound =
+    circpad_histogram_bin_to_usec(mi, bin_index+1)-1;
+
+  return left_bound + (right_bound - left_bound)/2;
+}
+
+/**
+ * Return the bin that contains the usec argument.
+ * "Contains" is defined as us in [lower, upper).
+ *
+ * This function will never return the infinity bin (histogram_len-1),
+ * in order to simplify the rest of the code.
+ *
+ * This means that technically the last bin (histogram_len-2)
+ * has range [start_usec+range_usec, CIRCPAD_DELAY_INFINITE].
+ */
+STATIC circpad_hist_index_t
+circpad_histogram_usec_to_bin(const circpad_machine_state_t *mi,
+                              circpad_delay_t usec)
+{
+  const circpad_state_t *state = circpad_machine_current_state(mi);
+  circpad_delay_t start_usec;
+  int32_t bin; /* Larger than return type to properly clamp overflow */
+
+  /* Our state should have been checked to be non-null by the caller
+   * (circpad_machine_remove_token()) */
+  if (BUG(state == NULL)) {
+    return 0;
+  }
+
+  if (state->use_rtt_estimate)
+    start_usec = mi->rtt_estimate_usec+state->start_usec;
+  else
+    start_usec = state->start_usec;
+
+  /* The first bin (#0) has zero width and starts (and ends) at start_usec. */
+  if (usec <= start_usec)
+    return 0;
+
+  if (usec == start_usec+1)
+    return 1;
+
+  const circpad_time_t histogram_range_usec = state->range_usec;
+  /* We need to find the bin corresponding to our position in the range.
+   * Since bins are exponentially spaced in powers of two, we need to
+   * take the log2 of our position in histogram_range_usec. However,
+   * since tor_log2() returns the floor(log2(u64)), we have to adjust
+   * it to behave like ceil(log2(u64)). This is verified in our tests
+   * to properly invert the operation done in
+   * circpad_histogram_bin_to_usec(). */
+  bin = CIRCPAD_INFINITY_BIN(state) -
+    tor_log2(2*histogram_range_usec/(usec-start_usec+1));
+
+  /* Clamp the return value to account for timevals before the start
+   * of bin 0, or after the last bin. Don't return the infinity bin
+   * index. */
+  bin = MIN(MAX(bin, 1), CIRCPAD_INFINITY_BIN(state)-1);
+  return bin;
+}
+
+/**
+ * This function frees any token bins allocated from a previous state
+ *
+ * Called after a state transition, or if the bins are empty.
+ */
+STATIC void
+circpad_machine_setup_tokens(circpad_machine_state_t *mi)
+{
+  const circpad_state_t *state = circpad_machine_current_state(mi);
+
+  /* If this state doesn't exist, or doesn't have token removal,
+   * free any previous state's histogram, and bail */
+  if (!state || state->token_removal == CIRCPAD_TOKEN_REMOVAL_NONE) {
+    if (mi->histogram) {
+      tor_free(mi->histogram);
+      mi->histogram = NULL;
+      mi->histogram_len = 0;
+    }
+    return;
+  }
+
+  /* Try to avoid re-mallocing if we don't really need to */
+  if (!mi->histogram || (mi->histogram
+          && mi->histogram_len != state->histogram_len)) {
+    tor_free(mi->histogram); // null ok
+    mi->histogram = tor_malloc_zero(sizeof(circpad_hist_token_t)
+                                    *state->histogram_len);
+  }
+  mi->histogram_len = state->histogram_len;
+
+  memcpy(mi->histogram, state->histogram,
+         sizeof(circpad_hist_token_t)*state->histogram_len);
+}
+
+/**
+ * Choose a length for this state (in cells), if specified.
+ */
+static void
+circpad_choose_state_length(circpad_machine_state_t *mi)
+{
+  const circpad_state_t *state = circpad_machine_current_state(mi);
+  double length;
+
+  if (!state || state->length_dist.type == CIRCPAD_DIST_NONE) {
+    mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE;
+    return;
+  }
+
+  length = circpad_distribution_sample(state->length_dist);
+  length = MAX(0, length);
+  length += state->start_length;
+  length = MIN(length, state->max_length);
+
+  mi->state_length = clamp_double_to_int64(length);
+}
+
+/**
+ * Sample a value from our iat_dist, and clamp it safely
+ * to circpad_delay_t.
+ */
+static circpad_delay_t
+circpad_distribution_sample_iat_delay(const circpad_state_t *state,
+                                      circpad_delay_t start_usec)
+{
+  double val = circpad_distribution_sample(state->iat_dist);
+  /* These comparisons are safe, because the output is in the range
+   * [0, 2**32), and double has a precision of 53 bits. */
+  val = MAX(0, val);
+  val = MIN(val, state->range_usec);
+
+  /* This addition is exact: val is at most 2**32-1, start_usec
+   * is at most 2**32-1, and doubles have a precision of 53 bits. */
+  val += start_usec;
+
+  /* Clamp the distribution at infinite delay val */
+  return (circpad_delay_t)MIN(tor_llround(val), CIRCPAD_DELAY_INFINITE);
+}
+
+/**
+ * Sample an expected time-until-next-packet delay from the histogram.
+ *
+ * The bin is chosen with probability proportional to the number
+ * of tokens in each bin, and then a time value is chosen uniformly from
+ * that bin's [start,end) time range.
+ */
+STATIC circpad_delay_t
+circpad_machine_sample_delay(circpad_machine_state_t *mi)
+{
+  const circpad_state_t *state = circpad_machine_current_state(mi);
+  const circpad_hist_token_t *histogram = NULL;
+  circpad_hist_index_t curr_bin = 0;
+  circpad_delay_t bin_start, bin_end;
+  circpad_delay_t start_usec;
+  /* These three must all be larger than circpad_hist_token_t, because
+   * we sum several circpad_hist_token_t values across the histogram */
+  uint64_t curr_weight = 0;
+  uint64_t histogram_total_tokens = 0;
+  uint64_t bin_choice;
+
+  tor_assert(state);
+
+  if (state->use_rtt_estimate)
+    start_usec = mi->rtt_estimate_usec+state->start_usec;
+  else
+    start_usec = state->start_usec;
+
+  if (state->iat_dist.type != CIRCPAD_DIST_NONE) {
+    /* Sample from a fixed IAT distribution and return */
+    return circpad_distribution_sample_iat_delay(state, start_usec);
+  } else if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE) {
+    /* We have a mutable histogram. Do basic sanity check and apply: */
+    if (BUG(!mi->histogram) ||
+        BUG(mi->histogram_len != state->histogram_len)) {
+      return CIRCPAD_DELAY_INFINITE;
+    }
+
+    histogram = mi->histogram;
+    for (circpad_hist_index_t b = 0; b < state->histogram_len; b++)
+      histogram_total_tokens += histogram[b];
+  } else {
+    /* We have a histogram, but it's immutable */
+    histogram = state->histogram;
+    histogram_total_tokens = state->histogram_total_tokens;
+  }
+
+  bin_choice = crypto_rand_uint64(histogram_total_tokens);
+
+  /* Skip all the initial zero bins */
+  while (!histogram[curr_bin]) {
+    curr_bin++;
+  }
+  curr_weight = histogram[curr_bin];
+
+  // TODO: This is not constant-time. Pretty sure we don't
+  // really need it to be, though.
+  while (curr_weight < bin_choice) {
+    curr_bin++;
+    /* It should be impossible to run past the end of the histogram */
+    if (BUG(curr_bin >= state->histogram_len)) {
+      return CIRCPAD_DELAY_INFINITE;
+    }
+    curr_weight += histogram[curr_bin];
+  }
+
+  /* Do some basic checking of the current bin we are in */
+  if (BUG(curr_bin >= state->histogram_len) ||
+      BUG(histogram[curr_bin] == 0)) {
+    return CIRCPAD_DELAY_INFINITE;
+  }
+
+  // Store this index to remove the token upon callback.
+  if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE) {
+    mi->chosen_bin = curr_bin;
+  }
+
+  if (curr_bin >= CIRCPAD_INFINITY_BIN(state)) {
+    if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE &&
+        mi->histogram[curr_bin] > 0) {
+      mi->histogram[curr_bin]--;
+    }
+
+    // Infinity: Don't send a padding packet. Wait for a real packet
+    // and then see if our bins are empty or what else we should do.
+    return CIRCPAD_DELAY_INFINITE;
+  }
+
+  tor_assert(curr_bin < CIRCPAD_INFINITY_BIN(state));
+
+  bin_start = circpad_histogram_bin_to_usec(mi, curr_bin);
+  /* We don't need to reduct 1 from the upper bound because the random range
+   * function below samples from [bin_start, bin_end) */
+  bin_end = circpad_histogram_bin_to_usec(mi, curr_bin+1);
+
+  /* Truncate the high bin in case it's the infinity bin:
+   * Don't actually schedule an "infinite"-1 delay */
+  bin_end = MIN(bin_end, start_usec+state->range_usec);
+
+  // Sample uniformly between histogram[i] to histogram[i+1]-1,
+  // but no need to sample if they are the same timeval (aka bin 0 or bin 1).
+  if (bin_end <= bin_start+1)
+    return bin_start;
+  else
+    return (circpad_delay_t)crypto_rand_uint64_range(bin_start, bin_end);
+}
+
+/**
+ * Sample a value from the specified probability distribution.
+ *
+ * This performs inverse transform sampling
+ * (https://en.wikipedia.org/wiki/Inverse_transform_sampling).
+ *
+ * XXX: These formulas were taken verbatim. Need a floating wizard
+ * to check them for catastropic cancellation and other issues (teor?).
+ * Also: is 32bits of double from [0.0,1.0) enough?
+ */
+static double
+circpad_distribution_sample(circpad_distribution_t dist)
+{
+  log_fn(LOG_DEBUG,LD_CIRC, "Sampling delay with distribution %d",
+         dist.type);
+
+  switch (dist.type) {
+    case CIRCPAD_DIST_NONE:
+      {
+        /* We should not get in here like this */
+        tor_assert_nonfatal_unreached();
+        return 0;
+      }
+    case CIRCPAD_DIST_UNIFORM:
+      {
+        // param2 is upper bound, param1 is lower
+        const struct uniform my_uniform = {
+          .base = UNIFORM(my_uniform),
+          .a = dist.param1,
+          .b = dist.param2,
+        };
+        return dist_sample(&my_uniform.base);
+      }
+    case CIRCPAD_DIST_LOGISTIC:
+      {
+      /* param1 is Mu, param2 is sigma. */
+        const struct logistic my_logistic = {
+          .base = LOGISTIC(my_logistic),
+          .mu = dist.param1,
+          .sigma = dist.param2,
+        };
+        return dist_sample(&my_logistic.base);
+      }
+    case CIRCPAD_DIST_LOG_LOGISTIC:
+      {
+        /* param1 is Alpha, param2 is 1.0/Beta */
+        const struct log_logistic my_log_logistic = {
+          .base = LOG_LOGISTIC(my_log_logistic),
+          .alpha = dist.param1,
+          .beta = dist.param2,
+        };
+        return dist_sample(&my_log_logistic.base);
+      }
+    case CIRCPAD_DIST_GEOMETRIC:
+      {
+        /* param1 is 'p' (success probability) */
+        const struct geometric my_geometric = {
+          .base = GEOMETRIC(my_geometric),
+          .p = dist.param1,
+        };
+        return dist_sample(&my_geometric.base);
+      }
+    case CIRCPAD_DIST_WEIBULL:
+      {
+        /* param1 is k, param2 is Lambda */
+        const struct weibull my_weibull = {
+          .base = WEIBULL(my_weibull),
+          .k = dist.param1,
+          .lambda = dist.param2,
+        };
+        return dist_sample(&my_weibull.base);
+      }
+    case CIRCPAD_DIST_PARETO:
+      {
+        /* param1 is sigma, param2 is xi, no more params for mu so we use 0 */
+        const struct genpareto my_genpareto = {
+          .base = GENPARETO(my_genpareto),
+          .mu = 0,
+          .sigma = dist.param1,
+          .xi = dist.param2,
+        };
+        return dist_sample(&my_genpareto.base);
+      }
+  }
+
+  tor_assert_nonfatal_unreached();
+  return 0;
+}
+
+/**
+ * Find the index of the first bin whose upper bound is
+ * greater than the target, and that has tokens remaining.
+ */
+static circpad_hist_index_t
+circpad_machine_first_higher_index(const circpad_machine_state_t *mi,
+                                   circpad_delay_t target_bin_usec)
+{
+  circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi,
+                                                           target_bin_usec);
+
+  /* Don't remove from the infinity bin */
+  for (; bin < CIRCPAD_INFINITY_BIN(mi); bin++) {
+    if (mi->histogram[bin] &&
+        circpad_histogram_bin_to_usec(mi, bin+1) > target_bin_usec) {
+      return bin;
+    }
+  }
+
+  return mi->histogram_len;
+}
+
+/**
+ * Find the index of the first bin whose lower bound is lower or equal to
+ * <b>target_bin_usec</b>, and that still has tokens remaining.
+ */
+static circpad_hist_index_t
+circpad_machine_first_lower_index(const circpad_machine_state_t *mi,
+                                  circpad_delay_t target_bin_usec)
+{
+  circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi,
+                                                           target_bin_usec);
+
+  for (; bin >= 0; bin--) {
+    if (mi->histogram[bin] &&
+        circpad_histogram_bin_to_usec(mi, bin) <= target_bin_usec) {
+      return bin;
+    }
+  }
+
+  return -1;
+}
+
+/**
+ * Remove a token from the first non-empty bin whose upper bound is
+ * greater than the target.
+ */
+STATIC void
+circpad_machine_remove_higher_token(circpad_machine_state_t *mi,
+                                    circpad_delay_t target_bin_usec)
+{
+  /* We need to remove the token from the first bin
+   * whose upper bound is greater than the target, and that
+   * has tokens remaining. */
+  circpad_hist_index_t bin = circpad_machine_first_higher_index(mi,
+                                                     target_bin_usec);
+
+  if (bin >= 0 && bin < CIRCPAD_INFINITY_BIN(mi)) {
+    if (!BUG(mi->histogram[bin] == 0)) {
+      mi->histogram[bin]--;
+    }
+  }
+}
+
+/**
+ * Remove a token from the first non-empty bin whose upper bound is
+ * lower than the target.
+ */
+STATIC void
+circpad_machine_remove_lower_token(circpad_machine_state_t *mi,
+                                   circpad_delay_t target_bin_usec)
+{
+  circpad_hist_index_t bin = circpad_machine_first_lower_index(mi,
+          target_bin_usec);
+
+  if (bin >= 0 && bin < CIRCPAD_INFINITY_BIN(mi)) {
+    if (!BUG(mi->histogram[bin] == 0)) {
+      mi->histogram[bin]--;
+    }
+  }
+}
+
+/* Helper macro: Ensure that the bin has tokens available, and BUG out of the
+ * function if it's not the case. */
+#define ENSURE_BIN_CAPACITY(bin_index) \
+  if (BUG(mi->histogram[bin_index] == 0)) {                   \
+    return;                                                   \
+  }
+
+/**
+ * Remove a token from the closest non-empty bin to the target.
+ *
+ * If use_usec is true, measure "closest" in terms of the next closest bin
+ * midpoint.
+ *
+ * If it is false, use bin index distance only.
+ */
+STATIC void
+circpad_machine_remove_closest_token(circpad_machine_state_t *mi,
+                                     circpad_delay_t target_bin_usec,
+                                     bool use_usec)
+{
+  circpad_hist_index_t lower, higher, current;
+  circpad_hist_index_t bin_to_remove = -1;
+
+  lower = circpad_machine_first_lower_index(mi, target_bin_usec);
+  higher = circpad_machine_first_higher_index(mi, target_bin_usec);
+  current = circpad_histogram_usec_to_bin(mi, target_bin_usec);
+
+  /* Sanity check the results */
+  if (BUG(lower > current) || BUG(higher < current)) {
+    return;
+  }
+
+  /* Take care of edge cases first */
+  if (higher == mi->histogram_len && lower == -1) {
+    /* All bins are empty */
+    return;
+  } else if (higher == mi->histogram_len) {
+    /* All higher bins are empty */
+    ENSURE_BIN_CAPACITY(lower);
+    mi->histogram[lower]--;
+    return;
+  } else if (lower == -1) {
+    /* All lower bins are empty */
+    ENSURE_BIN_CAPACITY(higher);
+    mi->histogram[higher]--;
+    return;
+  }
+
+  /* Now handle the intermediate cases */
+  if (use_usec) {
+    /* Find the closest bin midpoint to the target */
+    circpad_delay_t lower_usec = circpad_get_histogram_bin_midpoint(mi, lower);
+    circpad_delay_t higher_usec =
+      circpad_get_histogram_bin_midpoint(mi, higher);
+
+    if (target_bin_usec < lower_usec) {
+      // Lower bin is closer
+      ENSURE_BIN_CAPACITY(lower);
+      bin_to_remove = lower;
+    } else if (target_bin_usec > higher_usec) {
+      // Higher bin is closer
+      ENSURE_BIN_CAPACITY(higher);
+      bin_to_remove = higher;
+    } else if (target_bin_usec-lower_usec > higher_usec-target_bin_usec) {
+      // Higher bin is closer
+      ENSURE_BIN_CAPACITY(higher);
+      bin_to_remove = higher;
+    } else {
+      // Lower bin is closer
+      ENSURE_BIN_CAPACITY(lower);
+      bin_to_remove = lower;
+    }
+    mi->histogram[bin_to_remove]--;
+    log_debug(LD_GENERAL, "Removing token from bin %d", bin_to_remove);
+    return;
+  } else {
+    if (current - lower > higher - current) {
+      // Higher bin is closer
+      ENSURE_BIN_CAPACITY(higher);
+      mi->histogram[higher]--;
+      return;
+    } else {
+      // Lower bin is closer
+      ENSURE_BIN_CAPACITY(lower);
+      mi->histogram[lower]--;
+      return;
+    }
+  }
+}
+
+#undef ENSURE_BIN_CAPACITY
+
+/**
+ * Remove a token from the exact bin corresponding to the target.
+ *
+ * If it is empty, do nothing.
+ */
+static void
+circpad_machine_remove_exact(circpad_machine_state_t *mi,
+                             circpad_delay_t target_bin_usec)
+{
+  circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi,
+          target_bin_usec);
+
+  if (mi->histogram[bin] > 0)
+    mi->histogram[bin]--;
+}
+
+/**
+ * Check our state's cell limit count and tokens.
+ *
+ * Returns 1 if either limits are hit and we decide to change states,
+ * otherwise returns 0.
+ */
+static circpad_decision_t
+check_machine_token_supply(circpad_machine_state_t *mi)
+{
+  uint32_t histogram_total_tokens = 0;
+
+  /* Check if bins empty. This requires summing up the current mutable
+   * machineinfo histogram token total and checking if it is zero.
+   * Machineinfo does not keep a running token count. We're assuming the
+   * extra space is not worth this short loop iteration.
+   *
+   * We also do not count infinity bin in histogram totals.
+   */
+  if (mi->histogram_len && mi->histogram) {
+    for (circpad_hist_index_t b = 0; b < CIRCPAD_INFINITY_BIN(mi); b++)
+      histogram_total_tokens += mi->histogram[b];
+
+    /* If we change state, we're done */
+    if (histogram_total_tokens == 0) {
+      if (circpad_internal_event_bins_empty(mi) == CIRCPAD_STATE_CHANGED)
+        return CIRCPAD_STATE_CHANGED;
+    }
+  }
+
+  if (mi->state_length == 0) {
+    return circpad_internal_event_state_length_up(mi);
+  }
+
+  return CIRCPAD_STATE_UNCHANGED;
+}
+
+/**
+ * Remove a token from the bin corresponding to the delta since
+ * last packet. If that bin is empty, choose a token based on
+ * the specified removal strategy in the state machine.
+ *
+ * This function also updates and checks rate limit and state
+ * limit counters.
+ *
+ * Returns 1 if we transition states, 0 otherwise.
+ */
+STATIC circpad_decision_t
+circpad_machine_remove_token(circpad_machine_state_t *mi)
+{
+  const circpad_state_t *state = NULL;
+  circpad_time_t current_time;
+  circpad_delay_t target_bin_usec;
+
+  /* Update non-padding counts for rate limiting: We scale at UINT16_MAX
+   * because we only use this for a percentile limit of 2 sig figs, and
+   * space is scare in the machineinfo struct. */
+  mi->nonpadding_sent++;
+  if (mi->nonpadding_sent == UINT16_MAX) {
+    mi->padding_sent /= 2;
+    mi->nonpadding_sent /= 2;
+  }
+
+  /* Dont remove any tokens if there was no padding scheduled */
+  if (!mi->padding_scheduled_at_usec) {
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+
+  state = circpad_machine_current_state(mi);
+  current_time = monotime_absolute_usec();
+
+  /* If we have scheduled padding some time in the future, we want to see what
+     bin we are in at the current time */
+  target_bin_usec = (circpad_delay_t)
+                  MIN((current_time - mi->padding_scheduled_at_usec),
+                      CIRCPAD_DELAY_INFINITE-1);
+
+  /* We are treating this non-padding cell as a padding cell, so we cancel
+     padding timer, if present. */
+  mi->padding_scheduled_at_usec = 0;
+  if (mi->is_padding_timer_scheduled) {
+    mi->is_padding_timer_scheduled = 0;
+    timer_disable(mi->padding_timer);
+  }
+
+  /* If we are not in a padding state (like start or end), we're done */
+  if (!state)
+    return CIRCPAD_STATE_UNCHANGED;
+
+  /* If we're enforcing a state length on non-padding packets,
+   * decrement it */
+  if (mi->state_length != CIRCPAD_STATE_LENGTH_INFINITE &&
+      state->length_includes_nonpadding &&
+      mi->state_length > 0) {
+    mi->state_length--;
+  }
+
+  /* Perform the specified token removal strategy */
+  switch (state->token_removal) {
+    case CIRCPAD_TOKEN_REMOVAL_NONE:
+      break;
+    case CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC:
+      circpad_machine_remove_closest_token(mi, target_bin_usec, 1);
+      break;
+    case CIRCPAD_TOKEN_REMOVAL_CLOSEST:
+      circpad_machine_remove_closest_token(mi, target_bin_usec, 0);
+      break;
+    case CIRCPAD_TOKEN_REMOVAL_LOWER:
+      circpad_machine_remove_lower_token(mi, target_bin_usec);
+      break;
+    case CIRCPAD_TOKEN_REMOVAL_HIGHER:
+      circpad_machine_remove_higher_token(mi, target_bin_usec);
+      break;
+    case CIRCPAD_TOKEN_REMOVAL_EXACT:
+      circpad_machine_remove_exact(mi, target_bin_usec);
+      break;
+  }
+
+  /* Check our token and state length limits */
+  return check_machine_token_supply(mi);
+}
+
+/**
+ * Send a relay command with a relay cell payload on a circuit to
+ * the particular hopnum.
+ *
+ * Hopnum starts at 1 (1=guard, 2=middle, 3=exit, etc).
+ *
+ * Payload may be null.
+ *
+ * Returns negative on error, 0 on success.
+ */
+MOCK_IMPL(STATIC signed_error_t,
+circpad_send_command_to_hop,(origin_circuit_t *circ, uint8_t hopnum,
+                             uint8_t relay_command, const uint8_t *payload,
+                             ssize_t payload_len))
+{
+  crypt_path_t *target_hop = circuit_get_cpath_hop(circ, hopnum);
+  signed_error_t ret;
+
+  /* Check that the cpath has the target hop */
+  if (!target_hop) {
+    log_fn(LOG_WARN, LD_BUG, "Padding circuit %u has %d hops, not %d",
+           circ->global_identifier, circuit_get_cpath_len(circ), hopnum);
+    return -1;
+  }
+
+  /* Check that the target hop is opened */
+  if (target_hop->state != CPATH_STATE_OPEN) {
+    log_fn(LOG_WARN,LD_CIRC,
+           "Padding circuit %u has %d hops, not %d",
+           circ->global_identifier,
+           circuit_get_cpath_opened_len(circ), hopnum);
+    return -1;
+  }
+
+  /* Send the drop command to the second hop */
+  ret = relay_send_command_from_edge(0, TO_CIRCUIT(circ), relay_command,
+                                     (const char*)payload, payload_len,
+                                     target_hop);
+  return ret;
+}
+
+/**
+ * Callback helper to send a padding cell.
+ *
+ * This helper is called after our histogram-sampled delay period passes
+ * without another packet being sent first. If a packet is sent before this
+ * callback happens, it is canceled. So when we're called here, send padding
+ * right away.
+ *
+ * If sending this padding cell forced us to transition states return
+ * CIRCPAD_STATE_CHANGED. Otherwise return CIRCPAD_STATE_UNCHANGED.
+ */
+circpad_decision_t
+circpad_send_padding_cell_for_callback(circpad_machine_state_t *mi)
+{
+  circuit_t *circ = mi->on_circ;
+  int machine_idx = mi->machine_index;
+  mi->padding_scheduled_at_usec = 0;
+  circpad_statenum_t state = mi->current_state;
+
+  // Make sure circuit didn't close on us
+  if (mi->on_circ->marked_for_close) {
+    log_fn(LOG_INFO,LD_CIRC,
+           "Padding callback on a circuit marked for close. Ignoring.");
+    return CIRCPAD_STATE_CHANGED;
+  }
+
+  /* If it's a histogram, reduce the token count */
+  if (mi->histogram && mi->histogram_len) {
+    /* Basic sanity check on the histogram before removing anything */
+    if (BUG(mi->chosen_bin >= mi->histogram_len) ||
+        BUG(mi->histogram[mi->chosen_bin] == 0)) {
+      return CIRCPAD_STATE_CHANGED;
+    }
+
+    mi->histogram[mi->chosen_bin]--;
+  }
+
+  /* If we have a valid state length bound, consider it */
+  if (mi->state_length != CIRCPAD_STATE_LENGTH_INFINITE &&
+      !BUG(mi->state_length <= 0)) {
+    mi->state_length--;
+  }
+
+  /*
+   * Update non-padding counts for rate limiting: We scale at UINT16_MAX
+   * because we only use this for a percentile limit of 2 sig figs, and
+   * space is scare in the machineinfo struct.
+   */
+  mi->padding_sent++;
+  if (mi->padding_sent == UINT16_MAX) {
+    mi->padding_sent /= 2;
+    mi->nonpadding_sent /= 2;
+  }
+  circpad_global_padding_sent++;
+
+  if (CIRCUIT_IS_ORIGIN(mi->on_circ)) {
+    circpad_send_command_to_hop(TO_ORIGIN_CIRCUIT(mi->on_circ),
+                                CIRCPAD_GET_MACHINE(mi)->target_hopnum,
+                                RELAY_COMMAND_DROP, NULL, 0);
+    log_fn(LOG_INFO,LD_CIRC, "Callback: Sending padding to origin circuit %u.",
+           TO_ORIGIN_CIRCUIT(mi->on_circ)->global_identifier);
+  } else {
+    // If we're a non-origin circ, we can just send from here as if we're the
+    // edge.
+    log_fn(LOG_INFO,LD_CIRC,
+          "Callback: Sending padding to non-origin circuit.");
+    relay_send_command_from_edge(0, mi->on_circ, RELAY_COMMAND_DROP, NULL,
+                                 0, NULL);
+  }
+
+  rep_hist_padding_count_write(PADDING_TYPE_DROP);
+  /* This is a padding cell sent from the client or from the middle node,
+   * (because it's invoked from circuitpadding.c) */
+  circpad_cell_event_padding_sent(circ);
+
+  /* The circpad_cell_event_padding_sent() could cause us to transition.
+   * Check that we still have a padding machineinfo, and then check our token
+   * supply. */
+  if (circ->padding_info[machine_idx] != NULL) {
+    if (state != circ->padding_info[machine_idx]->current_state)
+      return CIRCPAD_STATE_CHANGED;
+    else
+      return check_machine_token_supply(circ->padding_info[machine_idx]);
+  } else {
+    return CIRCPAD_STATE_CHANGED;
+  }
+}
+
+/**
+ * Tor-timer compatible callback that tells us to send a padding cell.
+ *
+ * Timers are associated with circpad_machine_state_t's. When the machineinfo
+ * is freed on a circuit, the timers are cancelled. Since the lifetime
+ * of machineinfo is always longer than the timers, handles are not
+ * needed.
+ */
+static void
+circpad_send_padding_callback(tor_timer_t *timer, void *args,
+                              const struct monotime_t *time)
+{
+  circpad_machine_state_t *mi = ((circpad_machine_state_t*)args);
+  (void)timer; (void)time;
+
+  if (mi && mi->on_circ) {
+    assert_circuit_ok(mi->on_circ);
+    circpad_send_padding_cell_for_callback(mi);
+  } else {
+    // This shouldn't happen (represents a timer leak)
+    log_fn(LOG_WARN,LD_CIRC,
+            "Circuit closed while waiting for padding timer.");
+    tor_fragile_assert();
+  }
+
+  // TODO-MP-AP: Unify this counter with channelpadding for rephist stats
+  //total_timers_pending--;
+}
+
+/**
+ * Cache our consensus parameters upon consensus update.
+ */
+void
+circpad_new_consensus_params(const networkstatus_t *ns)
+{
+  circpad_global_allowed_cells =
+      networkstatus_get_param(ns, "circpad_global_allowed_cells",
+         0, 0, UINT16_MAX-1);
+
+  circpad_global_max_padding_percent =
+      networkstatus_get_param(ns, "circpad_global_max_padding_pct",
+         0, 0, 100);
+}
+
+/**
+ * Check this machine against its padding limits, as well as global
+ * consensus limits.
+ *
+ * We have two limits: a percent and a cell count. The cell count
+ * limit must be reached before the percent is enforced (this is to
+ * optionally allow very light padding of things like circuit setup
+ * while there is no other traffic on the circuit).
+ *
+ * TODO: Don't apply limits to machines form torrc.
+ *
+ * Returns 1 if limits are set and we've hit them. Otherwise returns 0.
+ */
+STATIC bool
+circpad_machine_reached_padding_limit(circpad_machine_state_t *mi)
+{
+  const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi);
+
+  /* If machine_padding_pct is non-zero, and we've sent more
+   * than the allowed count of padding cells, then check our
+   * percent limits for this machine. */
+   if (machine->max_padding_percent &&
+      mi->padding_sent >= machine->allowed_padding_count) {
+    uint32_t total_cells = mi->padding_sent + mi->nonpadding_sent;
+
+    /* Check the percent */
+    if ((100*(uint32_t)mi->padding_sent) / total_cells >
+        machine->max_padding_percent) {
+      return 1; // limit is reached. Stop.
+    }
+  }
+
+  /* If circpad_max_global_padding_pct is non-zero, and we've
+   * sent more than the global padding cell limit, then check our
+   * gloabl tor process percentage limit on padding. */
+  if (circpad_global_max_padding_percent &&
+      circpad_global_padding_sent >= circpad_global_allowed_cells) {
+    uint64_t total_cells = circpad_global_padding_sent +
+              circpad_global_nonpadding_sent;
+
+    /* Check the percent */
+    if ((100*circpad_global_padding_sent) / total_cells >
+        circpad_global_max_padding_percent) {
+      return 1; // global limit reached. Stop.
+    }
+  }
+
+  return 0; // All good!
+}
+
+/**
+ * Schedule the next padding time according to the machineinfo on a
+ * circuit.
+ *
+ * The histograms represent inter-packet-delay. Whenever you get an packet
+ * event you should be scheduling your next timer (after cancelling any old
+ * ones and updating tokens accordingly).
+ *
+ * Returns 1 if we decide to transition states (due to infinity bin),
+ * 0 otherwise.
+ */
+MOCK_IMPL(circpad_decision_t,
+circpad_machine_schedule_padding,(circpad_machine_state_t *mi))
+{
+  circpad_delay_t in_usec = 0;
+  struct timeval timeout;
+  tor_assert(mi);
+
+  // Don't pad in end (but  also don't cancel any previously
+  // scheduled padding either).
+  if (mi->current_state == CIRCPAD_STATE_END) {
+    log_fn(LOG_INFO, LD_CIRC, "Padding end state");
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+
+  /* Check our padding limits */
+  if (circpad_machine_reached_padding_limit(mi)) {
+   if (CIRCUIT_IS_ORIGIN(mi->on_circ)) {
+      log_fn(LOG_INFO, LD_CIRC,
+           "Padding machine has reached padding limit on circuit %u",
+             TO_ORIGIN_CIRCUIT(mi->on_circ)->global_identifier);
+    } else {
+      log_fn(LOG_INFO, LD_CIRC,
+           "Padding machine has reached padding limit on circuit %"PRIu64
+           ", %d",
+           mi->on_circ->n_chan ? mi->on_circ->n_chan->global_identifier : 0,
+           mi->on_circ->n_circ_id);
+    }
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+
+  if (mi->is_padding_timer_scheduled) {
+    /* Cancel current timer (if any) */
+    timer_disable(mi->padding_timer);
+    mi->is_padding_timer_scheduled = 0;
+  }
+
+  /* in_usec = in microseconds */
+  in_usec = circpad_machine_sample_delay(mi);
+  mi->padding_scheduled_at_usec = monotime_absolute_usec();
+  log_fn(LOG_INFO,LD_CIRC,"\tPadding in %u usec", in_usec);
+
+  // Don't schedule if we have infinite delay.
+  if (in_usec == CIRCPAD_DELAY_INFINITE) {
+    return circpad_internal_event_infinity(mi);
+  }
+
+  if (mi->state_length == 0) {
+    /* If we're at length 0, that means we hit 0 after sending
+     * a cell earlier, and emitted an event for it, but
+     * for whatever reason we did not decide to change states then.
+     * So maybe the machine is waiting for bins empty, or for an
+     * infinity event later? That would be a strange machine,
+     * but there's no reason to make it impossible. */
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+
+  if (in_usec <= 0) {
+    return circpad_send_padding_cell_for_callback(mi);
+  }
+
+  timeout.tv_sec = in_usec/TOR_USEC_PER_SEC;
+  timeout.tv_usec = (in_usec%TOR_USEC_PER_SEC);
+
+  log_fn(LOG_INFO, LD_CIRC, "\tPadding in %u sec, %u usec",
+          (unsigned)timeout.tv_sec, (unsigned)timeout.tv_usec);
+
+  if (mi->padding_timer) {
+    timer_set_cb(mi->padding_timer, circpad_send_padding_callback, mi);
+  } else {
+    mi->padding_timer =
+        timer_new(circpad_send_padding_callback, mi);
+  }
+  timer_schedule(mi->padding_timer, &timeout);
+  mi->is_padding_timer_scheduled = 1;
+
+  // TODO-MP-AP: Unify with channelpadding counter
+  //rep_hist_padding_count_timers(++total_timers_pending);
+
+  return CIRCPAD_STATE_UNCHANGED;
+}
+
+/**
+ * If the machine transitioned to the END state, we need
+ * to check to see if it wants us to shut it down immediately.
+ * If it does, then we need to send the appropate negotation commands
+ * depending on which side it is.
+ *
+ * After this function is called, mi may point to freed memory. Do
+ * not access it.
+ */
+static void
+circpad_machine_spec_transitioned_to_end(circpad_machine_state_t *mi)
+{
+  const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi);
+
+  /*
+   * We allow machines to shut down and delete themselves as opposed
+   * to just going back to START or waiting forever in END so that
+   * we can handle the case where this machine started while it was
+   * the only machine that matched conditions, but *since* then more
+   * "higher ranking" machines now match the conditions, and would
+   * be given a chance to take precidence over this one in
+   * circpad_add_matching_machines().
+   *
+   * Returning to START or waiting forever in END would not give those
+   * other machines a chance to be launched, where as shutting down
+   * here does.
+   */
+  if (machine->should_negotiate_end) {
+    circuit_t *on_circ = mi->on_circ;
+    if (machine->is_origin_side) {
+      /* We free the machine info here so that we can be replaced
+       * by a different machine. But we must leave the padding_machine
+       * in place to wait for the negotiated response */
+      circpad_circuit_machineinfo_free_idx(on_circ,
+                                           machine->machine_index);
+      circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(on_circ),
+                                machine->machine_num,
+                                machine->target_hopnum,
+                                CIRCPAD_COMMAND_STOP);
+    } else {
+      circpad_circuit_machineinfo_free_idx(on_circ,
+                                           machine->machine_index);
+      circpad_padding_negotiated(on_circ,
+                                machine->machine_num,
+                                CIRCPAD_COMMAND_STOP,
+                                CIRCPAD_RESPONSE_OK);
+      on_circ->padding_machine[machine->machine_index] = NULL;
+    }
+  }
+}
+
+/**
+ * Generic state transition function for padding state machines.
+ *
+ * Given an event and our mutable machine info, decide if/how to
+ * transition to a different state, and perform actions accordingly.
+ *
+ * Returns 1 if we transition states, 0 otherwise.
+ */
+MOCK_IMPL(circpad_decision_t,
+circpad_machine_spec_transition,(circpad_machine_state_t *mi,
+                            circpad_event_t event))
+{
+  const circpad_state_t *state =
+      circpad_machine_current_state(mi);
+
+  /* If state is null we are in the end state. */
+  if (!state) {
+    /* If we in end state we don't pad no matter what. */
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+
+  /* Check if this event is ignored or causes a cancel */
+  if (state->next_state[event] == CIRCPAD_STATE_IGNORE) {
+    return CIRCPAD_STATE_UNCHANGED;
+  } else if (state->next_state[event] == CIRCPAD_STATE_CANCEL) {
+    /* Check cancel events and cancel any pending padding */
+    mi->padding_scheduled_at_usec = 0;
+    if (mi->is_padding_timer_scheduled) {
+      mi->is_padding_timer_scheduled = 0;
+      /* Cancel current timer (if any) */
+      timer_disable(mi->padding_timer);
+    }
+    return CIRCPAD_STATE_UNCHANGED;
+  } else {
+    circpad_statenum_t s = state->next_state[event];
+    /* See if we need to transition to any other states based on this event.
+     * Whenever a transition happens, even to our own state, we schedule
+     * padding.
+     *
+     * So if a state only wants to schedule padding for an event, it specifies
+     * a transition to itself. All non-specified events are ignored.
+     */
+    log_fn(LOG_INFO, LD_CIRC,
+           "Circpad machine %d transitioning from %s to %s",
+            mi->machine_index, circpad_state_to_string(mi->current_state),
+            circpad_state_to_string(s));
+
+    /* If this is not the same state, switch and init tokens,
+     * otherwise just reschedule padding. */
+    if (mi->current_state != s) {
+      mi->current_state = s;
+      circpad_machine_setup_tokens(mi);
+      circpad_choose_state_length(mi);
+
+      /* If we transition to the end state, check to see
+       * if this machine wants to be shut down at end */
+      if (s == CIRCPAD_STATE_END) {
+        circpad_machine_spec_transitioned_to_end(mi);
+        /* We transitioned but we don't pad in end. Also, mi
+         * may be freed. Returning STATE_CHANGED prevents us
+         * from accessing it in any callers of this function. */
+        return CIRCPAD_STATE_CHANGED;
+      }
+
+      /* We transitioned to a new state, schedule padding */
+      circpad_machine_schedule_padding(mi);
+      return CIRCPAD_STATE_CHANGED;
+    }
+
+    /* We transitioned back to the same state. Schedule padding,
+     * and inform if that causes a state transition. */
+    return circpad_machine_schedule_padding(mi);
+  }
+
+  return CIRCPAD_STATE_UNCHANGED;
+}
+
+/**
+ * Estimate the circuit RTT from the current middle hop out to the
+ * end of the circuit.
+ *
+ * We estimate RTT by calculating the time between "receive" and
+ * "send" at a middle hop. This is because we "receive" a cell
+ * from the origin, and then relay it towards the exit before a
+ * response comes back. It is that response time from the exit side
+ * that we want to measure, so that we can make use of it for synthetic
+ * response delays.
+ */
+static void
+circpad_estimate_circ_rtt_on_received(circuit_t *circ,
+                                      circpad_machine_state_t *mi)
+{
+  /* Origin circuits don't estimate RTT. They could do it easily enough,
+   * but they have no reason to use it in any delay calculations. */
+  if (CIRCUIT_IS_ORIGIN(circ) || mi->stop_rtt_update)
+    return;
+
+  /* If we already have a last receieved packet time, that means we
+   * did not get a response before this packet. The RTT estimate
+   * only makes sense if we do not have multiple packets on the
+   * wire, so stop estimating if this is the second packet
+   * back to back. However, for the first set of back-to-back
+   * packets, we can wait until the very first response comes back
+   * to us, to measure that RTT (for the response to optimistic
+   * data, for example). Hence stop_rtt_update is only checked
+   * in this received side function, and not in send side below.
+   */
+  if (mi->last_received_time_usec) {
+    /* We also allow multiple back-to-back packets if the circuit is not
+     * opened, to handle var cells.
+     * XXX: Will this work with out var cell plans? Maybe not,
+     * since we're opened at the middle hop as soon as we process
+     * one var extend2 :/ */
+    if (circ->state == CIRCUIT_STATE_OPEN) {
+      log_fn(LOG_INFO, LD_CIRC,
+           "Stopping padding RTT estimation on circuit (%"PRIu64
+           ", %d) after two back to back packets. Current RTT: %d",
+           circ->n_chan ?  circ->n_chan->global_identifier : 0,
+           circ->n_circ_id, mi->rtt_estimate_usec);
+       mi->stop_rtt_update = 1;
+    }
+  } else {
+    mi->last_received_time_usec = monotime_absolute_usec();
+  }
+}
+
+/**
+ * Handles the "send" side of RTT calculation at middle nodes.
+ *
+ * This function calculates the RTT from the middle to the end
+ * of the circuit by subtracting the last received cell timestamp
+ * from the current time. It allows back-to-back cells until
+ * the circuit is opened, to allow for var cell handshakes.
+ * XXX: Check our var cell plans to make sure this will work.
+ */
+static void
+circpad_estimate_circ_rtt_on_send(circuit_t *circ,
+                                  circpad_machine_state_t *mi)
+{
+  /* Origin circuits don't estimate RTT. They could do it easily enough,
+   * but they have no reason to use it in any delay calculations. */
+  if (CIRCUIT_IS_ORIGIN(circ))
+    return;
+
+  /* If last_received_time_usec is non-zero, we are waiting for a response
+   * from the exit side. Calculate the time delta and use it as RTT. */
+  if (mi->last_received_time_usec) {
+    circpad_time_t rtt_time = monotime_absolute_usec() -
+        mi->last_received_time_usec;
+
+    /* Reset the last RTT packet time, so we can tell if two cells
+     * arrive back to back */
+    mi->last_received_time_usec = 0;
+
+    /* Use INT32_MAX to ensure the addition doesn't overflow */
+    if (rtt_time >= INT32_MAX) {
+      log_fn(LOG_WARN,LD_CIRC,
+             "Circuit padding RTT estimate overflowed: %"PRIu64
+             " vs %"PRIu64, monotime_absolute_usec(),
+               mi->last_received_time_usec);
+      return;
+    }
+
+    /* If the old RTT estimate is lower than this one, use this one, because
+     * the circuit is getting longer. If this estimate is somehow
+     * faster than the previous, then maybe that was network jitter.
+     * In that case, average them. */
+    if (mi->rtt_estimate_usec < (circpad_delay_t)rtt_time) {
+      mi->rtt_estimate_usec = (circpad_delay_t)rtt_time;
+    } else {
+      mi->rtt_estimate_usec += (circpad_delay_t)rtt_time;
+      mi->rtt_estimate_usec /= 2;
+    }
+  } else if (circ->state == CIRCUIT_STATE_OPEN) {
+    /* If last_received_time_usec is zero, then we have gotten two cells back
+     * to back. Stop estimating RTT in this case. Note that we only
+     * stop RTT update if the circuit is opened, to allow for RTT estimates
+     * of var cells during circ setup. */
+    mi->stop_rtt_update = 1;
+
+    if (!mi->rtt_estimate_usec) {
+      log_fn(LOG_NOTICE, LD_CIRC,
+             "Got two cells back to back on a circuit before estimating RTT.");
+    }
+  }
+}
+
+/**
+ * A "non-padding" cell has been sent from this endpoint. React
+ * according to any padding state machines on the circuit.
+ *
+ * For origin circuits, this means we sent a cell into the network.
+ * For middle relay circuits, this means we sent a cell towards the
+ * origin.
+ */
+void
+circpad_cell_event_nonpadding_sent(circuit_t *on_circ)
+{
+  /* Update global cell count */
+  circpad_global_nonpadding_sent++;
+
+  /* If there are no machines then this loop should not iterate */
+  FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) {
+    /* First, update any RTT estimate */
+    circpad_estimate_circ_rtt_on_send(on_circ, on_circ->padding_info[i]);
+
+    /* Remove a token: this is the idea of adaptive padding, since we have an
+     * ideal distribution that we want our distribution to look like. */
+    if (!circpad_machine_remove_token(on_circ->padding_info[i])) {
+      /* If removing a token did not cause a transition, check if
+       * non-padding sent event should */
+      circpad_machine_spec_transition(on_circ->padding_info[i],
+                                 CIRCPAD_EVENT_NONPADDING_SENT);
+    }
+  } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * A "non-padding" cell has been received by this endpoint. React
+ * according to any padding state machines on the circuit.
+ *
+ * For origin circuits, this means we read a cell from the network.
+ * For middle relay circuits, this means we received a cell from the
+ * origin.
+ */
+void
+circpad_cell_event_nonpadding_received(circuit_t *on_circ)
+{
+  FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) {
+    /* First, update any RTT estimate */
+    circpad_estimate_circ_rtt_on_received(on_circ, on_circ->padding_info[i]);
+
+    circpad_machine_spec_transition(on_circ->padding_info[i],
+                               CIRCPAD_EVENT_NONPADDING_RECV);
+  } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * A padding cell has been sent from this endpoint. React
+ * according to any padding state machines on the circuit.
+ *
+ * For origin circuits, this means we sent a cell into the network.
+ * For middle relay circuits, this means we sent a cell towards the
+ * origin.
+ */
+void
+circpad_cell_event_padding_sent(circuit_t *on_circ)
+{
+  FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) {
+    circpad_machine_spec_transition(on_circ->padding_info[i],
+                             CIRCPAD_EVENT_PADDING_SENT);
+  } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * A padding cell has been received by this endpoint. React
+ * according to any padding state machines on the circuit.
+ *
+ * For origin circuits, this means we read a cell from the network.
+ * For middle relay circuits, this means we received a cell from the
+ * origin.
+ */
+void
+circpad_cell_event_padding_received(circuit_t *on_circ)
+{
+  /* identical to padding sent */
+  FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) {
+    circpad_machine_spec_transition(on_circ->padding_info[i],
+                              CIRCPAD_EVENT_PADDING_RECV);
+  } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * An "infinite" delay has ben chosen from one of our histograms.
+ *
+ * "Infinite" delays mean don't send padding -- but they can also
+ * mean transition to another state depending on the state machine
+ * definitions. Check the rules and react accordingly.
+ *
+ * Return 1 if we decide to transition, 0 otherwise.
+ */
+circpad_decision_t
+circpad_internal_event_infinity(circpad_machine_state_t *mi)
+{
+  return circpad_machine_spec_transition(mi, CIRCPAD_EVENT_INFINITY);
+}
+
+/**
+ * All of the bins of our current state's histogram's are empty.
+ *
+ * Check to see if this means transition to another state, and if
+ * not, refill the tokens.
+ *
+ * Return 1 if we decide to transition, 0 otherwise.
+ */
+circpad_decision_t
+circpad_internal_event_bins_empty(circpad_machine_state_t *mi)
+{
+  if (circpad_machine_spec_transition(mi, CIRCPAD_EVENT_BINS_EMPTY)
+      == CIRCPAD_STATE_CHANGED) {
+    return CIRCPAD_STATE_CHANGED;
+  } else {
+    /* If we dont transition, then we refill the tokens */
+    circpad_machine_setup_tokens(mi);
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+}
+
+/**
+ * This state has used up its cell count. Emit the event and
+ * see if we transition.
+ *
+ * Return 1 if we decide to transition, 0 otherwise.
+ */
+circpad_decision_t
+circpad_internal_event_state_length_up(circpad_machine_state_t *mi)
+{
+  return circpad_machine_spec_transition(mi, CIRCPAD_EVENT_LENGTH_COUNT);
+}
+
+/**
+ * Returns true if the circuit matches the conditions.
+ */
+static inline bool
+circpad_machine_conditions_met(origin_circuit_t *circ,
+                               const circpad_machine_spec_t *machine)
+{
+  if (!(circpad_circ_purpose_to_mask(TO_CIRCUIT(circ)->purpose)
+      & machine->conditions.purpose_mask))
+    return 0;
+
+  if (machine->conditions.requires_vanguards) {
+    const or_options_t *options = get_options();
+
+    /* Pinned middles are effectively vanguards */
+    if (!(options->HSLayer2Nodes || options->HSLayer3Nodes))
+      return 0;
+  }
+
+  /* We check for any bits set in the circuit state mask so that machines
+   * can say any of the following through their state bitmask:
+   * "I want to apply to circuits with either streams or no streams"; OR
+   * "I only want to apply to circuits with streams"; OR
+   * "I only want to apply to circuits without streams". */
+  if (!(circpad_circuit_state(circ) & machine->conditions.state_mask))
+    return 0;
+
+  if (circuit_get_cpath_opened_len(circ) < machine->conditions.min_hops)
+    return 0;
+
+  return 1;
+}
+
+/**
+ * Returns a minimized representation of the circuit state.
+ *
+ * The padding code only cares if the circuit is building,
+ * opened, used for streams, and/or still has relay early cells.
+ * This returns a bitmask of all state properities that apply to
+ * this circuit.
+ */
+static inline
+circpad_circuit_state_t
+circpad_circuit_state(origin_circuit_t *circ)
+{
+  circpad_circuit_state_t retmask = 0;
+
+  if (circ->p_streams)
+    retmask |= CIRCPAD_CIRC_STREAMS;
+  else
+    retmask |= CIRCPAD_CIRC_NO_STREAMS;
+
+  /* We use has_opened to prevent cannibialized circs from flapping. */
+  if (circ->has_opened)
+    retmask |= CIRCPAD_CIRC_OPENED;
+  else
+    retmask |= CIRCPAD_CIRC_BUILDING;
+
+  if (circ->remaining_relay_early_cells > 0)
+    retmask |= CIRCPAD_CIRC_HAS_RELAY_EARLY;
+  else
+    retmask |= CIRCPAD_CIRC_HAS_NO_RELAY_EARLY;
+
+  return retmask;
+}
+
+/**
+ * Convert a normal circuit purpose into a bitmask that we can
+ * use for determining matching circuits.
+ */
+static inline
+circpad_purpose_mask_t
+circpad_circ_purpose_to_mask(uint8_t circ_purpose)
+{
+  /* Treat OR circ purposes as ignored. They should not be passed here*/
+  if (BUG(circ_purpose <= CIRCUIT_PURPOSE_OR_MAX_)) {
+    return 0;
+  }
+
+  /* Treat new client circuit purposes as "OMG ITS EVERYTHING".
+   * This also should not happen */
+  if (BUG(circ_purpose - CIRCUIT_PURPOSE_OR_MAX_ - 1 > 32)) {
+    return CIRCPAD_PURPOSE_ALL;
+  }
+
+  /* Convert the purpose to a bit position */
+  return 1 << (circ_purpose - CIRCUIT_PURPOSE_OR_MAX_ - 1);
+}
+
+/**
+ * Shut down any machines whose conditions no longer match
+ * the current circuit.
+ */
+static void
+circpad_shutdown_old_machines(origin_circuit_t *on_circ)
+{
+  circuit_t *circ = TO_CIRCUIT(on_circ);
+
+  FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, circ) {
+    if (!circpad_machine_conditions_met(on_circ,
+                                        circ->padding_machine[i])) {
+      // Clear machineinfo (frees timers)
+      circpad_circuit_machineinfo_free_idx(circ, i);
+      // Send padding negotiate stop
+      circpad_negotiate_padding(on_circ,
+                                circ->padding_machine[i]->machine_num,
+                                circ->padding_machine[i]->target_hopnum,
+                                CIRCPAD_COMMAND_STOP);
+    }
+  } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * Negotiate new machines that would apply to this circuit.
+ *
+ * This function checks to see if we have any free machine indexes,
+ * and for each free machine index, it initializes the most recently
+ * added origin-side padding machine that matches the target machine
+ * index and circuit conditions, and negotiates it with the appropriate
+ * middle relay.
+ */
+static void
+circpad_add_matching_machines(origin_circuit_t *on_circ)
+{
+  circuit_t *circ = TO_CIRCUIT(on_circ);
+
+#ifdef TOR_UNIT_TESTS
+  /* Tests don't have to init our padding machines */
+  if (!origin_padding_machines)
+    return;
+#endif
+
+  /* If padding negotiation failed before, do not try again */
+  if (on_circ->padding_negotiation_failed)
+    return;
+
+  FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) {
+    /* If there is a padding machine info, this index is occupied.
+     * No need to check conditions for this index. */
+    if (circ->padding_info[i])
+      continue;
+
+    /* We have a free machine index. Check the origin padding
+     * machines in reverse order, so that more recently added
+     * machines take priority over older ones. */
+    SMARTLIST_FOREACH_REVERSE_BEGIN(origin_padding_machines,
+                                    circpad_machine_spec_t *,
+                                    machine) {
+      /* Machine definitions have a specific target machine index.
+       * This is so event ordering is deterministic with respect
+       * to which machine gets events first when there are two
+       * machines installed on a circuit. Make sure we only
+       * add this machine if its target machine index is free. */
+      if (machine->machine_index == i &&
+          circpad_machine_conditions_met(on_circ, machine)) {
+
+        // We can only replace this machine if the target hopnum
+        // is the same, otherwise we'll get invalid data
+        if (circ->padding_machine[i]) {
+          if (circ->padding_machine[i]->target_hopnum !=
+              machine->target_hopnum)
+            continue;
+          /* Replace it. (Don't free - is global). */
+          circ->padding_machine[i] = NULL;
+        }
+
+        /* Set up the machine immediately so that the slot is occupied.
+         * We will tear it down on error return, or if there is an error
+         * response from the relay. */
+        circpad_setup_machine_on_circ(circ, machine);
+        if (circpad_negotiate_padding(on_circ, machine->machine_num,
+                                  machine->target_hopnum,
+                                  CIRCPAD_COMMAND_START) < 0) {
+          circpad_circuit_machineinfo_free_idx(circ, i);
+          circ->padding_machine[i] = NULL;
+          on_circ->padding_negotiation_failed = 1;
+        } else {
+          /* Success. Don't try any more machines */
+          return;
+        }
+      }
+    } SMARTLIST_FOREACH_END(machine);
+  } FOR_EACH_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * Event that tells us we added a hop to an origin circuit.
+ *
+ * This event is used to decide if we should create a padding machine
+ * on a circuit.
+ */
+void
+circpad_machine_event_circ_added_hop(origin_circuit_t *on_circ)
+{
+  /* Since our padding conditions do not specify a max_hops,
+   * all we can do is add machines here */
+  circpad_add_matching_machines(on_circ);
+}
+
+/**
+ * Event that tells us that an origin circuit is now built.
+ *
+ * Shut down any machines that only applied to un-built circuits.
+ * Activate any new ones.
+ */
+void
+circpad_machine_event_circ_built(origin_circuit_t *circ)
+{
+  circpad_shutdown_old_machines(circ);
+  circpad_add_matching_machines(circ);
+}
+
+/**
+ * Circpad purpose changed event.
+ *
+ * Shut down any machines that don't apply to our circ purpose.
+ * Activate any new ones that do.
+ */
+void
+circpad_machine_event_circ_purpose_changed(origin_circuit_t *circ)
+{
+  circpad_shutdown_old_machines(circ);
+  circpad_add_matching_machines(circ);
+}
+
+/**
+ * Event that tells us that an origin circuit is out of RELAY_EARLY
+ * cells.
+ *
+ * Shut down any machines that only applied to RELAY_EARLY circuits.
+ * Activate any new ones.
+ */
+void
+circpad_machine_event_circ_has_no_relay_early(origin_circuit_t *circ)
+{
+  circpad_shutdown_old_machines(circ);
+  circpad_add_matching_machines(circ);
+}
+
+/**
+ * Streams attached event.
+ *
+ * Called from link_apconn_to_circ() and handle_hs_exit_conn()
+ *
+ * Shut down any machines that only applied to machines without
+ * streams. Activate any new ones.
+ */
+void
+circpad_machine_event_circ_has_streams(origin_circuit_t *circ)
+{
+  circpad_shutdown_old_machines(circ);
+  circpad_add_matching_machines(circ);
+}
+
+/**
+ * Streams detached event.
+ *
+ * Called from circuit_detach_stream()
+ *
+ * Shut down any machines that only applied to machines without
+ * streams. Activate any new ones.
+ */
+void
+circpad_machine_event_circ_has_no_streams(origin_circuit_t *circ)
+{
+  circpad_shutdown_old_machines(circ);
+  circpad_add_matching_machines(circ);
+}
+
+/**
+ * Verify that padding is coming from the expected hop.
+ *
+ * Returns true if from_hop matches the target hop from
+ * one of our padding machines.
+ *
+ * Returns false if we're not an origin circuit, or if from_hop
+ * does not match one of the padding machines.
+ */
+bool
+circpad_padding_is_from_expected_hop(circuit_t *circ,
+                                     crypt_path_t *from_hop)
+{
+  crypt_path_t *target_hop = NULL;
+  if (!CIRCUIT_IS_ORIGIN(circ))
+    return 0;
+
+  FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) {
+    /* We have to check padding_machine and not padding_info/active
+     * machines here because padding may arrive after we shut down a
+     * machine. The info is gone, but the padding_machine waits
+     * for the padding_negotiated response to come back. */
+    if (!circ->padding_machine[i])
+      continue;
+
+    target_hop = circuit_get_cpath_hop(TO_ORIGIN_CIRCUIT(circ),
+                    circ->padding_machine[i]->target_hopnum);
+
+    if (target_hop == from_hop)
+      return 1;
+  } FOR_EACH_CIRCUIT_MACHINE_END;
+
+  return 0;
+}
+
+/**
+ * Deliver circpad events for an "unrecognized cell".
+ *
+ * Unrecognized cells are sent to relays and are forwarded
+ * onto the next hop of their circuits. Unrecognized cells
+ * are by definition not padding. We need to tell relay-side
+ * state machines that a non-padding cell was sent or received,
+ * depending on the direction, so they can update their histograms
+ * and decide to pad or not.
+ */
+void
+circpad_deliver_unrecognized_cell_events(circuit_t *circ,
+                                         cell_direction_t dir)
+{
+  // We should never see unrecognized cells at origin.
+  // Our caller emits a warn when this happens.
+  if (CIRCUIT_IS_ORIGIN(circ)) {
+    return;
+  }
+
+  if (dir == CELL_DIRECTION_OUT) {
+    /* When direction is out (away from origin), then we received non-padding
+       cell coming from the origin to us. */
+    circpad_cell_event_nonpadding_received(circ);
+  } else if (dir == CELL_DIRECTION_IN) {
+    /* It's in and not origin, so the cell is going away from us.
+     * So we are relaying a non-padding cell towards the origin. */
+    circpad_cell_event_nonpadding_sent(circ);
+  }
+}
+
+/**
+ * Deliver circpad events for "recognized" relay cells.
+ *
+ * Recognized cells are destined for this hop, either client or middle.
+ * Check if this is a padding cell or not, and send the appropiate
+ * received event.
+ */
+void
+circpad_deliver_recognized_relay_cell_events(circuit_t *circ,
+                                             uint8_t relay_command,
+                                             crypt_path_t *layer_hint)
+{
+  /* Padding negotiate cells are ignored by the state machines
+   * for simplicity. */
+  if (relay_command == RELAY_COMMAND_PADDING_NEGOTIATE ||
+      relay_command == RELAY_COMMAND_PADDING_NEGOTIATED) {
+    return;
+  }
+
+  if (relay_command == RELAY_COMMAND_DROP) {
+    rep_hist_padding_count_read(PADDING_TYPE_DROP);
+
+    if (CIRCUIT_IS_ORIGIN(circ)) {
+      if (circpad_padding_is_from_expected_hop(circ, layer_hint)) {
+        circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), 0);
+      } else {
+        /* This is unexpected padding. Ignore it for now. */
+        return;
+      }
+    }
+
+    /* The cell should be recognized by now, which means that we are on the
+       destination, which means that we received a padding cell. We might be
+       the client or the Middle node, still, because leaky-pipe. */
+    circpad_cell_event_padding_received(circ);
+    log_fn(LOG_INFO, LD_CIRC, "Got padding cell on %s circuit %u.",
+           CIRCUIT_IS_ORIGIN(circ) ? "origin" : "non-origin",
+           CIRCUIT_IS_ORIGIN(circ) ?
+             TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0);
+  } else {
+    /* We received a non-padding cell on the edge */
+    circpad_cell_event_nonpadding_received(circ);
+  }
+}
+
+/**
+ * Deliver circpad events for relay cells sent from us.
+ *
+ * If this is a padding cell, update our padding stats
+ * and deliver the event. Otherwise just deliver the event.
+ */
+void
+circpad_deliver_sent_relay_cell_events(circuit_t *circ,
+                                       uint8_t relay_command)
+{
+  /* Padding negotiate cells are ignored by the state machines
+   * for simplicity. */
+  if (relay_command == RELAY_COMMAND_PADDING_NEGOTIATE ||
+      relay_command == RELAY_COMMAND_PADDING_NEGOTIATED) {
+    return;
+  }
+
+  /* RELAY_COMMAND_DROP is the multi-hop (aka circuit-level) padding cell in
+   * tor. (CELL_PADDING is a channel-level padding cell, which is not relayed
+   * or processed here) */
+  if (relay_command == RELAY_COMMAND_DROP) {
+    /* Optimization: The event for RELAY_COMMAND_DROP is sent directly
+     * from circpad_send_padding_cell_for_callback(). This is to avoid
+     * putting a cell_t and a relay_header_t on the stack repeatedly
+     * if we decide to send a long train of padidng cells back-to-back
+     * with 0 delay. So we do nothing here. */
+    return;
+  } else {
+    /* This is a non-padding cell sent from the client or from
+     * this node. */
+    circpad_cell_event_nonpadding_sent(circ);
+  }
+}
+
+/**
+ * Initialize the states array for a circpad machine.
+ */
+void
+circpad_machine_states_init(circpad_machine_spec_t *machine,
+                            circpad_statenum_t num_states)
+{
+  if (BUG(num_states > CIRCPAD_MAX_MACHINE_STATES)) {
+    num_states = CIRCPAD_MAX_MACHINE_STATES;
+  }
+
+  machine->num_states = num_states;
+  machine->states = tor_malloc_zero(sizeof(circpad_state_t)*num_states);
+
+  /* Initialize the default next state for all events to
+   * "ignore" -- if events aren't specified, they are ignored. */
+  for (circpad_statenum_t s = 0; s < num_states; s++) {
+    for (int e = 0; e < CIRCPAD_NUM_EVENTS; e++) {
+      machine->states[s].next_state[e] = CIRCPAD_STATE_IGNORE;
+    }
+  }
+}
+
+static void
+circpad_setup_machine_on_circ(circuit_t *on_circ,
+                              const circpad_machine_spec_t *machine)
+{
+  if (CIRCUIT_IS_ORIGIN(on_circ) && !machine->is_origin_side) {
+    log_fn(LOG_WARN, LD_BUG,
+           "Can't set up non-origin machine on origin circuit!");
+    return;
+  }
+
+  if (!CIRCUIT_IS_ORIGIN(on_circ) && machine->is_origin_side) {
+    log_fn(LOG_WARN, LD_BUG,
+           "Can't set up origin machine on non-origin circuit!");
+    return;
+  }
+
+  tor_assert_nonfatal(on_circ->padding_machine[machine->machine_index]
+                      == NULL);
+  tor_assert_nonfatal(on_circ->padding_info[machine->machine_index] == NULL);
+
+  on_circ->padding_info[machine->machine_index] =
+      circpad_circuit_machineinfo_new(on_circ, machine->machine_index);
+  on_circ->padding_machine[machine->machine_index] = machine;
+}
+
+/* These padding machines are only used for tests pending #28634. */
+#ifdef TOR_UNIT_TESTS
+static void
+circpad_circ_client_machine_init(void)
+{
+  circpad_machine_spec_t *circ_client_machine
+      = tor_malloc_zero(sizeof(circpad_machine_spec_t));
+
+  // XXX: Better conditions for merge.. Or disable this machine in
+  // merge?
+  circ_client_machine->conditions.min_hops = 2;
+  circ_client_machine->conditions.state_mask =
+      CIRCPAD_CIRC_BUILDING|CIRCPAD_CIRC_OPENED|CIRCPAD_CIRC_HAS_RELAY_EARLY;
+  circ_client_machine->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL;
+
+  circ_client_machine->target_hopnum = 2;
+  circ_client_machine->is_origin_side = 1;
+
+  /* Start, gap, burst */
+  circpad_machine_states_init(circ_client_machine, 3);
+
+  circ_client_machine->states[CIRCPAD_STATE_START].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  circ_client_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+  circ_client_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  /* If we are in burst state, and we send a non-padding cell, then we cancel
+     the timer for the next padding cell:
+     We dont want to send fake extends when actual extends are going on */
+  circ_client_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_CANCEL;
+
+  circ_client_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_BINS_EMPTY] = CIRCPAD_STATE_END;
+
+  circ_client_machine->states[CIRCPAD_STATE_BURST].token_removal =
+      CIRCPAD_TOKEN_REMOVAL_CLOSEST;
+
+  // FIXME: Tune this histogram
+  circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_len = 2;
+  circ_client_machine->states[CIRCPAD_STATE_BURST].start_usec = 500;
+  circ_client_machine->states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+  /* We have 5 tokens in the histogram, which means that all circuits will look
+   * like they have 7 hops (since we start this machine after the second hop,
+   * and tokens are decremented for any valid hops, and fake extends are
+   * used after that -- 2+5==7). */
+  circ_client_machine->states[CIRCPAD_STATE_BURST].histogram[0] = 5;
+  circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_total_tokens = 5;
+
+  circ_client_machine->machine_num = smartlist_len(origin_padding_machines);
+  smartlist_add(origin_padding_machines, circ_client_machine);
+}
+
+static void
+circpad_circ_responder_machine_init(void)
+{
+  circpad_machine_spec_t *circ_responder_machine
+      = tor_malloc_zero(sizeof(circpad_machine_spec_t));
+
+  /* Shut down the machine after we've sent enough packets */
+  circ_responder_machine->should_negotiate_end = 1;
+
+  /* The relay-side doesn't care what hopnum it is, but for consistency,
+   * let's match the client */
+  circ_responder_machine->target_hopnum = 2;
+  circ_responder_machine->is_origin_side = 0;
+
+  /* Start, gap, burst */
+  circpad_machine_states_init(circ_responder_machine, 3);
+
+  /* This is the settings of the state machine. In the future we are gonna
+     serialize this into the consensus or the torrc */
+
+  /* We transition to the burst state on padding receive and on non-padding
+   * recieve */
+  circ_responder_machine->states[CIRCPAD_STATE_START].
+      next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST;
+  circ_responder_machine->states[CIRCPAD_STATE_START].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  /* Inside the burst state we _stay_ in the burst state when a non-padding
+   * is sent */
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_BURST;
+
+  /* Inside the burst state we transition to the gap state when we receive a
+   * padding cell */
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_GAP;
+
+  /* These describe the padding charasteristics when in burst state */
+
+  /* use_rtt_estimate tries to estimate how long padding cells take to go from
+     C->M, and uses that as what as the base of the histogram */
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].use_rtt_estimate = 1;
+  /* The histogram is 2 bins: an empty one, and infinity */
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram_len = 2;
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].start_usec = 5000;
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+  /* During burst state we wait forever for padding to arrive.
+
+     We are waiting for a padding cell from the client to come in, so that we
+     respond, and we immitate how extend looks like */
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram[0] = 0;
+  // Only infinity bin:
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram[1] = 1;
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].
+      histogram_total_tokens = 1;
+
+  /* From the gap state, we _stay_ in the gap state, when we receive padding
+   * or non padding */
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].
+      next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_GAP;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_GAP;
+
+  /* And from the gap state, we go to the end, when the bins are empty or a
+   * non-padding cell is sent */
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].
+      next_state[CIRCPAD_EVENT_BINS_EMPTY] = CIRCPAD_STATE_END;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].
+      next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_END;
+
+  // FIXME: Tune this histogram
+
+  /* The gap state is the delay you wait after you receive a padding cell
+     before you send a padding response */
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].use_rtt_estimate = 1;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_len = 6;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].start_usec = 5000;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].range_usec = 1000000;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[0] = 0;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[1] = 1;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[2] = 2;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[3] = 2;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[4] = 1;
+  /* Total number of tokens */
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_total_tokens = 6;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].token_removal =
+      CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC;
+
+  circ_responder_machine->machine_num = smartlist_len(relay_padding_machines);
+  smartlist_add(relay_padding_machines, circ_responder_machine);
+}
+#endif
+
+/**
+ * Initialize all of our padding machines.
+ *
+ * This is called at startup. It sets up some global machines, and then
+ * loads some from torrc, and from the tor consensus.
+ */
+void
+circpad_machines_init(void)
+{
+  tor_assert_nonfatal(origin_padding_machines == NULL);
+  tor_assert_nonfatal(relay_padding_machines == NULL);
+
+  origin_padding_machines = smartlist_new();
+  relay_padding_machines = smartlist_new();
+
+  // TODO: Parse machines from consensus and torrc
+#ifdef TOR_UNIT_TESTS
+  circpad_circ_client_machine_init();
+  circpad_circ_responder_machine_init();
+#endif
+}
+
+/**
+ * Free our padding machines
+ */
+void
+circpad_machines_free(void)
+{
+  if (origin_padding_machines) {
+    SMARTLIST_FOREACH(origin_padding_machines,
+                      circpad_machine_spec_t *,
+                      m, tor_free(m->states); tor_free(m));
+    smartlist_free(origin_padding_machines);
+  }
+
+  if (relay_padding_machines) {
+    SMARTLIST_FOREACH(relay_padding_machines,
+                      circpad_machine_spec_t *,
+                      m, tor_free(m->states); tor_free(m));
+    smartlist_free(relay_padding_machines);
+  }
+}
+
+/**
+ * Check the Protover info to see if a node supports padding.
+ */
+static bool
+circpad_node_supports_padding(const node_t *node)
+{
+  if (node->rs) {
+    log_fn(LOG_INFO, LD_CIRC, "Checking padding: %s",
+           node->rs->pv.supports_padding ? "supported" : "unsupported");
+    return node->rs->pv.supports_padding;
+  }
+
+  log_fn(LOG_INFO, LD_CIRC, "Empty routerstatus in padding check");
+  return 0;
+}
+
+/**
+ * Get a node_t for the nth hop in our circuit, starting from 1.
+ *
+ * Returns node_t from the consensus for that hop, if it is opened.
+ * Otherwise returns NULL.
+ */
+static const node_t *
+circuit_get_nth_node(origin_circuit_t *circ, int hop)
+{
+  crypt_path_t *iter = circuit_get_cpath_hop(circ, hop);
+
+  if (!iter || iter->state != CPATH_STATE_OPEN)
+    return NULL;
+
+  return node_get_by_id(iter->extend_info->identity_digest);
+}
+
+/**
+ * Return true if a particular circuit supports padding
+ * at the desired hop.
+ */
+static bool
+circpad_circuit_supports_padding(origin_circuit_t *circ,
+                                 int target_hopnum)
+{
+  const node_t *hop;
+
+  if (!(hop = circuit_get_nth_node(circ, target_hopnum))) {
+    return 0;
+  }
+
+  return circpad_node_supports_padding(hop);
+}
+
+/**
+ * Try to negotiate padding.
+ *
+ * Returns -1 on error, 0 on success.
+ */
+signed_error_t
+circpad_negotiate_padding(origin_circuit_t *circ,
+                          circpad_machine_num_t machine,
+                          uint8_t target_hopnum,
+                          uint8_t command)
+{
+  circpad_negotiate_t type;
+  cell_t cell;
+  ssize_t len;
+
+  /* Check that the target hop lists support for padding in
+   * its ProtoVer fields */
+  if (!circpad_circuit_supports_padding(circ, target_hopnum)) {
+    return -1;
+  }
+
+  memset(&cell, 0, sizeof(cell_t));
+  memset(&type, 0, sizeof(circpad_negotiate_t));
+  // This gets reset to RELAY_EARLY appropriately by
+  // relay_send_command_from_edge_. At least, it looks that way.
+  // QQQ-MP-AP: Verify that.
+  cell.command = CELL_RELAY;
+
+  circpad_negotiate_set_command(&type, command);
+  circpad_negotiate_set_version(&type, 0);
+  circpad_negotiate_set_machine_type(&type, machine);
+
+  if ((len = circpad_negotiate_encode(cell.payload, CELL_PAYLOAD_SIZE,
+        &type)) < 0)
+    return -1;
+
+  log_fn(LOG_INFO,LD_CIRC, "Negotiating padding on circuit %u",
+         circ->global_identifier);
+
+  return circpad_send_command_to_hop(circ, target_hopnum,
+                                     RELAY_COMMAND_PADDING_NEGOTIATE,
+                                     cell.payload, len);
+}
+
+/**
+ * Try to negotiate padding.
+ *
+ * Returns 1 if successful (or already set up), 0 otherwise.
+ */
+bool
+circpad_padding_negotiated(circuit_t *circ,
+                           circpad_machine_num_t machine,
+                           uint8_t command,
+                           uint8_t response)
+{
+  circpad_negotiated_t type;
+  cell_t cell;
+  ssize_t len;
+
+  memset(&cell, 0, sizeof(cell_t));
+  memset(&type, 0, sizeof(circpad_negotiated_t));
+  // This gets reset to RELAY_EARLY appropriately by
+  // relay_send_command_from_edge_. At least, it looks that way.
+  // QQQ-MP-AP: Verify that.
+  cell.command = CELL_RELAY;
+
+  circpad_negotiated_set_command(&type, command);
+  circpad_negotiated_set_response(&type, response);
+  circpad_negotiated_set_version(&type, 0);
+  circpad_negotiated_set_machine_type(&type, machine);
+
+  if ((len = circpad_negotiated_encode(cell.payload, CELL_PAYLOAD_SIZE,
+        &type)) < 0)
+    return 0;
+
+  /* Use relay_send because we're from the middle to the origin. We don't
+   * need to specify a target hop or layer_hint. */
+  return relay_send_command_from_edge(0, circ,
+                                      RELAY_COMMAND_PADDING_NEGOTIATED,
+                                      (void*)cell.payload,
+                                      (size_t)len, NULL) == 0;
+}
+
+/**
+ * Parse and react to a padding_negotiate cell.
+ *
+ * This is called at the middle node upon receipt of the client's choice of
+ * state machine, so that it can use the requested state machine index, if
+ * it is available.
+ *
+ * Returns -1 on error, 0 on success.
+ */
+signed_error_t
+circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell)
+{
+  int retval = 0;
+  circpad_negotiate_t *negotiate;
+
+  if (CIRCUIT_IS_ORIGIN(circ)) {
+    log_fn(LOG_WARN, LD_PROTOCOL,
+           "Padding negotiate cell unsupported at origin.");
+    return -1;
+  }
+
+  if (circpad_negotiate_parse(&negotiate, cell->payload+RELAY_HEADER_SIZE,
+                               CELL_PAYLOAD_SIZE-RELAY_HEADER_SIZE) < 0) {
+    log_fn(LOG_WARN, LD_CIRC,
+          "Received malformed PADDING_NEGOTIATE cell; dropping.");
+    return -1;
+  }
+
+  if (negotiate->command == CIRCPAD_COMMAND_STOP) {
+    /* Free the machine corresponding to this machine type */
+    free_circ_machineinfos_with_machine_num(circ, negotiate->machine_type);
+    log_fn(LOG_WARN, LD_CIRC,
+          "Received circuit padding stop command for unknown machine.");
+    goto err;
+  } else if (negotiate->command == CIRCPAD_COMMAND_START) {
+    SMARTLIST_FOREACH_BEGIN(relay_padding_machines,
+                            const circpad_machine_spec_t *, m) {
+      if (m->machine_num == negotiate->machine_type) {
+        circpad_setup_machine_on_circ(circ, m);
+        goto done;
+      }
+    } SMARTLIST_FOREACH_END(m);
+  }
+
+  err:
+    retval = -1;
+
+  done:
+    circpad_padding_negotiated(circ, negotiate->machine_type,
+                   negotiate->command,
+                   (retval == 0) ? CIRCPAD_RESPONSE_OK : CIRCPAD_RESPONSE_ERR);
+    circpad_negotiate_free(negotiate);
+
+    return retval;
+}
+
+/**
+ * Parse and react to a padding_negotiated cell.
+ *
+ * This is called at the origin upon receipt of the middle's response
+ * to our choice of state machine.
+ *
+ * Returns -1 on error, 0 on success.
+ */
+signed_error_t
+circpad_handle_padding_negotiated(circuit_t *circ, cell_t *cell,
+                                  crypt_path_t *layer_hint)
+{
+  circpad_negotiated_t *negotiated;
+
+  if (!CIRCUIT_IS_ORIGIN(circ)) {
+    log_fn(LOG_WARN, LD_PROTOCOL,
+           "Padding negotiated cell unsupported at non-origin.");
+    return -1;
+  }
+
+  /* Verify this came from the expected hop */
+  if (!circpad_padding_is_from_expected_hop(circ, layer_hint)) {
+    log_fn(LOG_WARN, LD_PROTOCOL,
+           "Padding negotiated cell from wrong hop!");
+    return -1;
+  }
+
+  if (circpad_negotiated_parse(&negotiated, cell->payload+RELAY_HEADER_SIZE,
+                               CELL_PAYLOAD_SIZE-RELAY_HEADER_SIZE) < 0) {
+    log_fn(LOG_WARN, LD_CIRC,
+          "Received malformed PADDING_NEGOTIATED cell; "
+          "dropping.");
+    return -1;
+  }
+
+  if (negotiated->command == CIRCPAD_COMMAND_STOP) {
+    /* There may not be a padding_info here if we shut down the
+     * machine in circpad_shutdown_old_machines(). Or, if
+     * circpad_add_matching_matchines() added a new machine,
+     * there may be a padding_machine for a different machine num
+     * than this response. */
+    free_circ_machineinfos_with_machine_num(circ, negotiated->machine_type);
+  } else if (negotiated->command == CIRCPAD_COMMAND_START &&
+             negotiated->response == CIRCPAD_RESPONSE_ERR) {
+    // This can happen due to consensus drift.. free the machines
+    // and be sad
+    free_circ_machineinfos_with_machine_num(circ, negotiated->machine_type);
+    TO_ORIGIN_CIRCUIT(circ)->padding_negotiation_failed = 1;
+    log_fn(LOG_INFO, LD_CIRC,
+           "Middle node did not accept our padding request.");
+  }
+
+  circpad_negotiated_free(negotiated);
+  return 0;
+}
+
+/* Serialization */
+// TODO: Should we use keyword=value here? Are there helpers for that?
+#if 0
+static void
+circpad_state_serialize(const circpad_state_t *state,
+                        smartlist_t *chunks)
+{
+  smartlist_add_asprintf(chunks, " %u", state->histogram[0]);
+  for (int i = 1; i < state->histogram_len; i++) {
+    smartlist_add_asprintf(chunks, ",%u",
+                           state->histogram[i]);
+  }
+
+  smartlist_add_asprintf(chunks, " 0x%x",
+                         state->transition_cancel_events);
+
+  for (int i = 0; i < CIRCPAD_NUM_STATES; i++) {
+    smartlist_add_asprintf(chunks, ",0x%x",
+                           state->transition_events[i]);
+  }
+
+  smartlist_add_asprintf(chunks, " %u %u",
+                         state->use_rtt_estimate,
+                         state->token_removal);
+}
+
+char *
+circpad_machine_spec_to_string(const circpad_machine_spec_t *machine)
+{
+  smartlist_t *chunks = smartlist_new();
+  char *out;
+  (void)machine;
+
+  circpad_state_serialize(&machine->start, chunks);
+  circpad_state_serialize(&machine->gap, chunks);
+  circpad_state_serialize(&machine->burst, chunks);
+
+  out = smartlist_join_strings(chunks, "", 0, NULL);
+
+  SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp));
+  smartlist_free(chunks);
+  return out;
+}
+
+// XXX: Writeme
+const circpad_machine_spec_t *
+circpad_string_to_machine(const char *str)
+{
+  (void)str;
+  return NULL;
+}
+
+#endif
diff --git a/src/core/or/circuitpadding.h b/src/core/or/circuitpadding.h
new file mode 100644
index 0000000000..628f27ec11
--- /dev/null
+++ b/src/core/or/circuitpadding.h
@@ -0,0 +1,696 @@
+/*
+ * Copyright (c) 2017, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file circuitpadding.h
+ * \brief Header file for circuitpadding.c.
+ **/
+
+#ifndef TOR_CIRCUITPADDING_H
+#define TOR_CIRCUITPADDING_H
+
+#include "src/trunnel/circpad_negotiation.h"
+#include "lib/evloop/timers.h"
+
+struct circuit_t;
+struct origin_circuit_t;
+struct cell_t;
+
+/**
+ * Signed error return with the specific property that negative
+ * values mean error codes of various semantics, 0 means success,
+ * and positive values are unused.
+ *
+ * XXX: Tor uses this concept a lot but just calls it int. Should we move
+ * this somewhere centralized? Where?
+ */
+typedef int signed_error_t;
+
+/**
+ * These constants specify the types of events that can cause
+ * transitions between state machine states.
+ *
+ * Note that SENT and RECV are relative to this endpoint. For
+ * relays, SENT means packets destined towards the client and
+ * RECV means packets destined towards the relay. On the client,
+ * SENT means packets destined towards the relay, where as RECV
+ * means packets destined towards the client.
+ */
+typedef enum {
+  /* A non-padding cell was received. */
+  CIRCPAD_EVENT_NONPADDING_RECV = 0,
+  /* A non-padding cell was sent. */
+  CIRCPAD_EVENT_NONPADDING_SENT = 1,
+  /* A padding cell (RELAY_COMMAND_DROP) was sent. */
+  CIRCPAD_EVENT_PADDING_SENT = 2,
+  /* A padding cell was received. */
+  CIRCPAD_EVENT_PADDING_RECV = 3,
+  /* We tried to schedule padding but we ended up picking the infinity bin
+   * which means that padding was delayed infinitely */
+  CIRCPAD_EVENT_INFINITY = 4,
+  /* All histogram bins are empty (we are out of tokens) */
+  CIRCPAD_EVENT_BINS_EMPTY = 5,
+  /* just a counter of the events above */
+  CIRCPAD_EVENT_LENGTH_COUNT = 6
+} circpad_event_t;
+#define CIRCPAD_NUM_EVENTS ((int)CIRCPAD_EVENT_LENGTH_COUNT+1)
+
+/** Boolean type that says if we decided to transition states or not */
+typedef enum {
+  CIRCPAD_STATE_UNCHANGED = 0,
+  CIRCPAD_STATE_CHANGED = 1
+} circpad_decision_t;
+
+/** The type for the things in histogram bins (aka tokens) */
+typedef uint32_t circpad_hist_token_t;
+
+/** The type for histogram indexes (needs to be negative for errors) */
+typedef int8_t circpad_hist_index_t;
+
+/** The type for absolute time, from monotime_absolute_usec() */
+typedef uint64_t circpad_time_t;
+
+/** The type for timer delays, in microseconds */
+typedef uint32_t circpad_delay_t;
+
+/**
+ * An infinite padding cell delay means don't schedule any padding --
+ * simply wait until a different event triggers a transition.
+ *
+ * This means that the maximum delay we can scedule is UINT32_MAX-1
+ * microseconds, or about 4300 seconds (1.25 hours).
+ * XXX: Is this enough if we want to simulate light, intermittent
+ * activity on an onion service?
+ */
+#define CIRCPAD_DELAY_INFINITE  (UINT32_MAX)
+
+/**
+ * Macro to clarify when we're checking the infinity bin.
+ *
+ * Works with either circpad_state_t or circpad_machine_state_t
+ */
+#define CIRCPAD_INFINITY_BIN(mi)  ((mi)->histogram_len-1)
+
+/**
+ * These constants form a bitfield that specifies when a state machine
+ * should be applied to a circuit.
+ *
+ * If any of these elements is set, then the circuit will be tested against
+ * that specific condition. If an element is unset, then we don't test it.
+ * (E.g. If neither NO_STREAMS or STREAMS are set, then we will not care
+ * whether a circuit has streams attached when we apply a state machine)
+ *
+ * The helper function circpad_circuit_state() converts circuit state
+ * flags into this more compact representation.
+ */
+typedef enum {
+  /* Only apply machine if the circuit is still building */
+  CIRCPAD_CIRC_BUILDING = 1<<0,
+  /* Only apply machine if the circuit is open */
+  CIRCPAD_CIRC_OPENED = 1<<1,
+  /* Only apply machine if the circuit has no attached streams */
+  CIRCPAD_CIRC_NO_STREAMS = 1<<2,
+  /* Only apply machine if the circuit has attached streams */
+  CIRCPAD_CIRC_STREAMS = 1<<3,
+  /* Only apply machine if the circuit still allows RELAY_EARLY cells */
+  CIRCPAD_CIRC_HAS_RELAY_EARLY = 1<<4,
+  /* Only apply machine if the circuit has depleted its RELAY_EARLY cells
+   * allowance. */
+  CIRCPAD_CIRC_HAS_NO_RELAY_EARLY = 1<<5
+} circpad_circuit_state_t;
+
+/** Bitmask that says "apply this machine to all states" */
+#define CIRCPAD_STATE_ALL   \
+    (CIRCPAD_CIRC_BUILDING|CIRCPAD_CIRC_OPENED| \
+     CIRCPAD_CIRC_STREAMS|CIRCPAD_CIRC_NO_STREAMS| \
+     CIRCPAD_CIRC_HAS_RELAY_EARLY|CIRCPAD_CIRC_HAS_NO_RELAY_EARLY)
+
+/**
+ * A compact circuit purpose bitfield mask that allows us to compactly
+ * specify which circuit purposes a machine should apply to.
+ *
+ * The helper function circpad_circ_purpose_to_mask() converts circuit
+ * purposes into bit positions in this bitmask.
+ */
+typedef uint32_t circpad_purpose_mask_t;
+
+/** Bitmask that says "apply this machine to all purposes". */
+#define CIRCPAD_PURPOSE_ALL (0xFFFFFFFF)
+
+/**
+ * This type specifies all of the conditions that must be met before
+ * a client decides to initiate padding on a circuit.
+ *
+ * A circuit must satisfy every sub-field in this type in order
+ * to be considered to match the conditions.
+ */
+typedef struct circpad_machine_conditions_t {
+  /** Only apply the machine *if* the circuit has at least this many hops */
+  unsigned min_hops : 3;
+
+  /** Only apply the machine *if* vanguards are enabled */
+  unsigned requires_vanguards : 1;
+
+  /** Only apply the machine *if* the circuit's state matches any of
+   *  the bits set in this bitmask. */
+  circpad_circuit_state_t state_mask;
+
+  /** Only apply a machine *if* the circuit's purpose matches one
+   *  of the bits set in this bitmask */
+  circpad_purpose_mask_t purpose_mask;
+
+} circpad_machine_conditions_t;
+
+/**
+ * Token removal strategy options.
+ *
+ * The WTF-PAD histograms are meant to specify a target distribution to shape
+ * traffic towards. This is accomplished by removing tokens from the histogram
+ * when either padding or non-padding cells are sent.
+ *
+ * When we see a non-padding cell at a particular time since the last cell, you
+ * remove a token from the corresponding delay bin. These flags specify
+ * which bin to choose if that bin is already empty.
+ */
+typedef enum {
+  /** Don't remove any tokens */
+  CIRCPAD_TOKEN_REMOVAL_NONE = 0,
+  /**
+   * Remove from the first non-zero higher bin index when current is zero.
+   * This is the recommended strategy from the Adaptive Padding paper. */
+  CIRCPAD_TOKEN_REMOVAL_HIGHER = 1,
+  /** Remove from the first non-zero lower bin index when current is empty. */
+  CIRCPAD_TOKEN_REMOVAL_LOWER = 2,
+  /** Remove from the closest non-zero bin index when current is empty. */
+  CIRCPAD_TOKEN_REMOVAL_CLOSEST = 3,
+  /** Remove from the closest bin by time value (since bins are
+   *  exponentially spaced). */
+  CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC = 4,
+  /** Only remove from the exact bin corresponding to this delay. If
+   *  the bin is 0, simply do nothing. Don't pick another bin. */
+  CIRCPAD_TOKEN_REMOVAL_EXACT = 5
+} circpad_removal_t;
+
+/**
+ * Distribution types supported by circpad_distribution_sample().
+ *
+ * These can be used instead of histograms for the inter-packet
+ * timing distribution, or to specify a distribution on the number
+ * of cells that can be sent while in a specific state of the state
+ * machine. */
+typedef enum {
+  CIRCPAD_DIST_NONE = 0,
+  CIRCPAD_DIST_UNIFORM = 1,
+  CIRCPAD_DIST_LOGISTIC = 2,
+  CIRCPAD_DIST_LOG_LOGISTIC = 3,
+  CIRCPAD_DIST_GEOMETRIC = 4,
+  CIRCPAD_DIST_WEIBULL = 5,
+  CIRCPAD_DIST_PARETO = 6
+} circpad_distribution_type_t;
+
+/**
+ * Distribution information.
+ *
+ * This type specifies a specific distribution above, as well as
+ * up to two parameters for that distribution. The specific
+ * per-distribution meaning of these parameters is specified
+ * in circpad_distribution_sample().
+ */
+typedef struct circpad_distribution_t {
+  circpad_distribution_type_t type;
+  double param1;
+  double param2;
+} circpad_distribution_t;
+
+/** State number type. Represents current state of state machine. */
+typedef uint16_t circpad_statenum_t;
+#define  CIRCPAD_STATENUM_MAX   (UINT16_MAX)
+
+/** A histogram is used to sample padding delays given a machine state.  This
+ *  constant defines the maximum histogram width (i.e. the max number of bins)
+ *
+ *  Each histogram bin is twice as large as the previous. Two exceptions: The
+ *  first bin has zero width (which means that minimum delay is applied to the
+ *  next padding cell), and the last bin (infinity bin) has infinite width
+ *  (which means that the next padding cell will be delayed infinitely). */
+#define CIRCPAD_MAX_HISTOGRAM_LEN (sizeof(circpad_delay_t)*8 + 1)
+
+/**
+ * A state of a padding state machine. The information here are immutable and
+ * represent the initial form of the state; it does not get updated as things
+ * happen. The mutable information that gets updated in runtime are carried in
+ * a circpad_machine_state_t.
+ *
+ * This struct describes the histograms and parameters of a single
+ * state in the adaptive padding machine. Instances of this struct
+ * exist in global circpad machine definitions that come from torrc
+ * or the consensus.
+ */
+typedef struct circpad_state_t {
+  /** If a histogram is used for this state, this specifies the number of bins
+   *  of this histogram. Histograms must have at least 2 bins.
+   *
+   *  If a delay probability distribution is used for this state, this is set
+   *  to 0. */
+  circpad_hist_index_t histogram_len;
+  /** The histogram itself: an array of uint16s of tokens, whose
+   *  widths are exponentially spaced, in microseconds */
+  circpad_hist_token_t histogram[CIRCPAD_MAX_HISTOGRAM_LEN];
+  /** Total number of tokens in this histogram. This is a constant and is *not*
+   *  decremented every time we spend a token. It's used for initializing and
+   *  refilling the histogram. */
+  uint32_t histogram_total_tokens;
+
+  /** Minimum padding delay of this state in microseconds.
+   *
+   *  If histograms are used, this is the left (and right) bound of the first
+   *  bin (since it has zero width).
+   *
+   *  If a delay probability distribution is used, this represents the minimum
+   *  delay we can sample from the distribution.
+   */
+  circpad_delay_t start_usec;
+
+  /** If histograms are used, this is the width of the whole histogram in
+   *  microseconds, and it's used to calculate individual bin width.
+   *
+   *  If a delay probability distribution is used, this is used as the max
+   *  delay we can sample from the distribution.
+   */
+  circpad_delay_t range_usec;
+
+  /**
+   * Represents a delay probability distribution (aka IAT distribution). It's a
+   * parametrized way of encoding inter-packet delay information in
+   * microseconds. It can be used instead of histograms.
+   *
+   * If it is used, token_removal below must be set to
+   * CIRCPAD_TOKEN_REMOVAL_NONE.
+   *
+   * Start_usec, range_sec, and rtt_estimates are still applied to the
+   * results of sampling from this distribution (range_sec is used as a max).
+   */
+  circpad_distribution_t iat_dist;
+
+  /**
+   * The length dist is a parameterized way of encoding how long this
+   * state machine runs in terms of sent padding cells or all
+   * sent cells. Values are sampled from this distribution, clamped
+   * to max_len, and then start_len is added to that value.
+   *
+   * It may be specified instead of or in addition to
+   * the infinity bins and bins empty conditions. */
+  circpad_distribution_t length_dist;
+  /** A minimum length value, added to the output of length_dist */
+  uint16_t start_length;
+  /** A cap on the length value that can be sampled from the length_dist */
+  uint64_t max_length;
+
+  /** Should we decrement length when we see a nonpadding packet?
+   * XXX: Are there any machines that actually want to set this to 0? There may
+   * not be. OTOH, it's only a bit.. */
+  unsigned length_includes_nonpadding : 1;
+
+  /**
+   * This is an array that specifies the next state to transition to upon
+   * receipt an event matching the indicated array index.
+   *
+   * This aborts our scheduled packet and switches to the state
+   * corresponding to the index of the array. Tokens are filled upon
+   * this transition.
+   *
+   * States are allowed to transition to themselves, which means re-schedule
+   * a new padding timer. They are also allowed to temporarily "transition"
+   * to the "IGNORE" and "CANCEL" pseudo-states. See #defines below
+   * for details on state behavior and meaning.
+   */
+  circpad_statenum_t next_state[CIRCPAD_NUM_EVENTS];
+
+  /**
+   * If true, estimate the RTT from this relay to the exit/website and add that
+   * to start_usec for use as the histogram bin 0 start delay.
+   *
+   * Right now this is only supported for relay-side state machines.
+   */
+  unsigned use_rtt_estimate : 1;
+
+  /** This specifies the token removal strategy to use upon padding and
+   *  non-padding activity. */
+  circpad_removal_t token_removal;
+} circpad_state_t;
+
+/**
+ * The start state for this machine.
+ *
+ * In the original WTF-PAD, this is only used for transition to/from
+ * the burst state. All other fields are not used. But to simplify the
+ * code we've made it a first-class state. This has no performance
+ * consequences, but may make naive serialization of the state machine
+ * large, if we're not careful about how we represent empty fields.
+ */
+#define  CIRCPAD_STATE_START       0
+
+/**
+ * The burst state for this machine.
+ *
+ * In the original Adaptive Padding algorithm and in WTF-PAD
+ * (https://www.freehaven.net/anonbib/cache/ShWa-Timing06.pdf and
+ * https://www.cs.kau.se/pulls/hot/thebasketcase-wtfpad/), the burst
+ * state serves to detect bursts in traffic. This is done by using longer
+ * delays in its histogram, which represent the expected delays between
+ * bursts of packets in the target stream. If this delay expires without a
+ * real packet being sent, the burst state sends a padding packet and then
+ * immediately transitions to the gap state, which is used to generate
+ * a synthetic padding packet train. In this implementation, this transition
+ * needs to be explicitly specified in the burst state's transition events.
+ *
+ * Because of this flexibility, other padding mechanisms can transition
+ * between these two states arbitrarily, to encode other dynamics of
+ * target traffic.
+ */
+#define  CIRCPAD_STATE_BURST       1
+
+/**
+ * The gap state for this machine.
+ *
+ * In the original Adaptive Padding algorithm and in WTF-PAD, the gap
+ * state serves to simulate an artificial packet train composed of padding
+ * packets. It does this by specifying much lower inter-packet delays than
+ * the burst state, and transitioning back to itself after padding is sent
+ * if these timers expire before real traffic is sent. If real traffic is
+ * sent, it transitions back to the burst state.
+ *
+ * Again, in this implementation, these transitions must be specified
+ * explicitly, and other transitions are also permitted.
+ */
+#define  CIRCPAD_STATE_GAP         2
+
+/**
+ * End is a pseudo-state that causes the machine to go completely
+ * idle, and optionally get torn down (depending on the
+ * value of circpad_machine_spec_t.should_negotiate_end)
+ *
+ * End MUST NOT occupy a slot in the machine state array.
+ */
+#define  CIRCPAD_STATE_END         CIRCPAD_STATENUM_MAX
+
+/**
+ * "Ignore" is a pseudo-state that means "do not react to this
+ * event".
+ *
+ * "Ignore" MUST NOT occupy a slot in the machine state array.
+ */
+#define  CIRCPAD_STATE_IGNORE         (CIRCPAD_STATENUM_MAX-1)
+
+/**
+ * "Cancel" is a pseudo-state that means "cancel pending timers,
+ * but remain in your current state".
+ *
+ * Cancel MUST NOT occupy a slot in the machine state array.
+ */
+#define  CIRCPAD_STATE_CANCEL         (CIRCPAD_STATENUM_MAX-2)
+
+/**
+ * Since we have 3 pseudo-states, the max state array length is
+ * up to one less than cancel's statenum.
+ */
+#define CIRCPAD_MAX_MACHINE_STATES  (CIRCPAD_STATE_CANCEL-1)
+
+/**
+ * Mutable padding machine info.
+ *
+ * This structure contains mutable information about a padding
+ * machine. The mutable information must be kept separate because
+ * it exists per-circuit, where as the machines themselves are global.
+ * This separation is done to conserve space in the circuit structure.
+ *
+ * This is the per-circuit state that changes regarding the global state
+ * machine. Some parts of it are optional (ie NULL).
+ *
+ * XXX: Play with layout to minimize space on x64 Linux (most common relay).
+ */
+typedef struct circpad_machine_state_t {
+  /** The callback pointer for the padding callbacks.
+   *
+   *  These timers stick around the machineinfo until the machineinfo's circuit
+   *  is closed, at which point the timer is cancelled. For this reason it's
+   *  safe to assume that the machineinfo exists if this timer gets
+   *  triggered. */
+  tor_timer_t *padding_timer;
+
+  /** The circuit for this machine */
+  struct circuit_t *on_circ;
+
+  /** A mutable copy of the histogram for the current state.
+   *  NULL if remove_tokens is false for that state */
+  circpad_hist_token_t *histogram;
+  /** Length of the above histogram.
+   * XXX: This field *could* be removed at the expense of added
+   * complexity+overhead for reaching back into the immutable machine
+   * state every time we need to inspect the histogram. It's only a byte,
+   * though, so it seemed worth it.
+   */
+  circpad_hist_index_t histogram_len;
+  /** Remove token from this index upon sending padding */
+  circpad_hist_index_t chosen_bin;
+
+  /** Stop padding/transition if this many cells sent */
+  uint64_t state_length;
+#define CIRCPAD_STATE_LENGTH_INFINITE UINT64_MAX
+
+  /** A scaled count of padding packets sent, used to limit padding overhead.
+   * When this reaches UINT16_MAX, we cut it and nonpadding_sent in half. */
+  uint16_t padding_sent;
+  /** A scaled count of non-padding packets sent, used to limit padding
+   *  overhead. When this reaches UINT16_MAX, we cut it and padding_sent in
+   *  half. */
+  uint16_t nonpadding_sent;
+
+  /**
+   * EWMA estimate of the RTT of the circuit from this hop
+   * to the exit end, in microseconds. */
+  circpad_delay_t rtt_estimate_usec;
+
+  /**
+   * The last time we got an event relevant to estimating
+   * the RTT. Monotonic time in microseconds since system
+   * start.
+   */
+  circpad_time_t last_received_time_usec;
+
+  /**
+   * The time at which we scheduled a non-padding packet,
+   * or selected an infinite delay.
+   *
+   * Monotonic time in microseconds since system start.
+   * This is 0 if we haven't chosen a padding delay.
+   */
+  circpad_time_t padding_scheduled_at_usec;
+
+  /** What state is this machine in? */
+  circpad_statenum_t current_state;
+
+  /**
+   * True if we have scheduled a timer for padding.
+   *
+   * This is 1 if a timer is pending. It is 0 if
+   * no timer is scheduled. (It can be 0 even when
+   * padding_was_scheduled_at_usec is non-zero).
+   */
+  unsigned is_padding_timer_scheduled : 1;
+
+  /**
+   * If this is true, we have seen full duplex behavior.
+   * Stop updating the RTT.
+   */
+  unsigned stop_rtt_update : 1;
+
+/** Max number of padding machines on each circuit. If changed,
+ * also ensure the machine_index bitwith supports the new size. */
+#define CIRCPAD_MAX_MACHINES    (2)
+  /** Which padding machine index was this for.
+   * (make sure changes to the bitwidth can support the
+   * CIRCPAD_MAX_MACHINES define). */
+  unsigned machine_index : 1;
+
+} circpad_machine_state_t;
+
+/** Helper macro to get an actual state machine from a machineinfo */
+#define CIRCPAD_GET_MACHINE(machineinfo) \
+    ((machineinfo)->on_circ->padding_machine[(machineinfo)->machine_index])
+
+/**
+ * This specifies a particular padding machine to use after negotiation.
+ *
+ * The constants for machine_num_t are in trunnel.
+ * We want to be able to define extra numbers in the consensus/torrc, though.
+ */
+typedef uint8_t circpad_machine_num_t;
+
+/** Global state machine structure from the consensus */
+typedef struct circpad_machine_spec_t {
+  /** Global machine number */
+  circpad_machine_num_t machine_num;
+
+  /** Which machine index slot should this machine go into in
+   *  the array on the circuit_t */
+  unsigned machine_index : 1;
+
+  /** Send a padding negotiate to shut down machine at end state? */
+  unsigned should_negotiate_end : 1;
+
+  // These next three fields are origin machine-only...
+  /** Origin side or relay side */
+  unsigned is_origin_side : 1;
+
+  /** Which hop in the circuit should we send padding to/from?
+   *  1-indexed (ie: hop #1 is guard, #2 middle, #3 exit). */
+  unsigned target_hopnum : 3;
+
+  /** This machine only kills fascists if the following conditions are met. */
+  circpad_machine_conditions_t conditions;
+
+  /** How many padding cells can be sent before we apply overhead limits?
+   * XXX: Note that we can only allow up to 64k of padding cells on an
+   * otherwise quiet circuit. Is this enough? It's 33MB. */
+  uint16_t allowed_padding_count;
+
+  /** Padding percent cap: Stop padding if we exceed this percent overhead.
+   * 0 means no limit. Overhead is defined as percent of total traffic, so
+   * that we can use 0..100 here. This is the same definition as used in
+   * Prop#265. */
+  uint8_t max_padding_percent;
+
+  /** State array: indexed by circpad_statenum_t */
+  circpad_state_t *states;
+
+  /**
+   * Number of states this machine has (ie: length of the states array).
+   * XXX: This field is not needed other than for safety. */
+  circpad_statenum_t num_states;
+} circpad_machine_spec_t;
+
+void circpad_new_consensus_params(const networkstatus_t *ns);
+
+/**
+ * The following are event call-in points that are of interest to
+ * the state machines. They are called during cell processing. */
+void circpad_deliver_unrecognized_cell_events(struct circuit_t *circ,
+                                              cell_direction_t dir);
+void circpad_deliver_sent_relay_cell_events(struct circuit_t *circ,
+                                            uint8_t relay_command);
+void circpad_deliver_recognized_relay_cell_events(struct circuit_t *circ,
+                                                  uint8_t relay_command,
+                                                  crypt_path_t *layer_hint);
+
+/** Cell events are delivered by the above delivery functions */
+void circpad_cell_event_nonpadding_sent(struct circuit_t *on_circ);
+void circpad_cell_event_nonpadding_received(struct circuit_t *on_circ);
+void circpad_cell_event_padding_sent(struct circuit_t *on_circ);
+void circpad_cell_event_padding_received(struct circuit_t *on_circ);
+
+/** Internal events are events the machines send to themselves */
+circpad_decision_t
+circpad_internal_event_infinity(circpad_machine_state_t *mi);
+circpad_decision_t
+circpad_internal_event_bins_empty(circpad_machine_state_t *);
+circpad_decision_t circpad_internal_event_state_length_up(
+                                  circpad_machine_state_t *);
+
+/** Machine creation events are events that cause us to set up or
+ *  tear down padding state machines. */
+void circpad_machine_event_circ_added_hop(struct origin_circuit_t *on_circ);
+void circpad_machine_event_circ_built(struct origin_circuit_t *circ);
+void circpad_machine_event_circ_purpose_changed(struct origin_circuit_t *circ);
+void circpad_machine_event_circ_has_streams(struct origin_circuit_t *circ);
+void circpad_machine_event_circ_has_no_streams(struct origin_circuit_t *circ);
+void
+circpad_machine_event_circ_has_no_relay_early(struct origin_circuit_t *circ);
+
+void circpad_machines_init(void);
+void circpad_machines_free(void);
+
+void circpad_machine_states_init(circpad_machine_spec_t *machine,
+                                 circpad_statenum_t num_states);
+
+void circpad_circuit_free_all_machineinfos(struct circuit_t *circ);
+
+bool circpad_padding_is_from_expected_hop(struct circuit_t *circ,
+                                         crypt_path_t *from_hop);
+
+/** Serializaton functions for writing to/from torrc and consensus */
+char *circpad_machine_spec_to_string(const circpad_machine_spec_t *machine);
+const circpad_machine_spec_t *circpad_string_to_machine(const char *str);
+
+/* Padding negotiation between client and middle */
+signed_error_t circpad_handle_padding_negotiate(struct circuit_t *circ,
+                                      struct cell_t *cell);
+signed_error_t circpad_handle_padding_negotiated(struct circuit_t *circ,
+                                      struct cell_t *cell,
+                                      crypt_path_t *layer_hint);
+signed_error_t circpad_negotiate_padding(struct origin_circuit_t *circ,
+                          circpad_machine_num_t machine,
+                          uint8_t target_hopnum,
+                          uint8_t command);
+bool circpad_padding_negotiated(struct circuit_t *circ,
+                           circpad_machine_num_t machine,
+                           uint8_t command,
+                           uint8_t response);
+
+MOCK_DECL(circpad_decision_t,
+circpad_machine_schedule_padding,(circpad_machine_state_t *));
+
+MOCK_DECL(circpad_decision_t,
+circpad_machine_spec_transition, (circpad_machine_state_t *mi,
+                             circpad_event_t event));
+
+circpad_decision_t circpad_send_padding_cell_for_callback(
+                                 circpad_machine_state_t *mi);
+
+#ifdef CIRCUITPADDING_PRIVATE
+STATIC circpad_delay_t
+circpad_machine_sample_delay(circpad_machine_state_t *mi);
+
+STATIC bool
+circpad_machine_reached_padding_limit(circpad_machine_state_t *mi);
+
+STATIC
+circpad_decision_t circpad_machine_remove_token(circpad_machine_state_t *mi);
+
+STATIC circpad_delay_t
+circpad_histogram_bin_to_usec(const circpad_machine_state_t *mi,
+                              circpad_hist_index_t bin);
+
+STATIC const circpad_state_t *
+circpad_machine_current_state(const circpad_machine_state_t *mi);
+
+STATIC circpad_hist_index_t circpad_histogram_usec_to_bin(
+                                       const circpad_machine_state_t *mi,
+                                       circpad_delay_t us);
+
+STATIC circpad_machine_state_t *circpad_circuit_machineinfo_new(
+                                               struct circuit_t *on_circ,
+                                               int machine_index);
+STATIC void circpad_machine_remove_higher_token(circpad_machine_state_t *mi,
+                                         circpad_delay_t target_bin_us);
+STATIC void circpad_machine_remove_lower_token(circpad_machine_state_t *mi,
+                                         circpad_delay_t target_bin_us);
+STATIC void circpad_machine_remove_closest_token(circpad_machine_state_t *mi,
+                                         circpad_delay_t target_bin_us,
+                                         bool use_usec);
+STATIC void circpad_machine_setup_tokens(circpad_machine_state_t *mi);
+
+MOCK_DECL(STATIC signed_error_t,
+circpad_send_command_to_hop,(struct origin_circuit_t *circ, uint8_t hopnum,
+                             uint8_t relay_command, const uint8_t *payload,
+                             ssize_t payload_len));
+
+#ifdef TOR_UNIT_TESTS
+extern smartlist_t *origin_padding_machines;
+extern smartlist_t *relay_padding_machines;
+#endif
+
+#endif
+
+#endif
diff --git a/src/core/or/circuituse.c b/src/core/or/circuituse.c
index b7a4ab1b9e..70e3e97ff7 100644
--- a/src/core/or/circuituse.c
+++ b/src/core/or/circuituse.c
@@ -35,6 +35,7 @@
 #include "core/or/circuitlist.h"
 #include "core/or/circuitstats.h"
 #include "core/or/circuituse.h"
+#include "core/or/circuitpadding.h"
 #include "core/or/connection_edge.h"
 #include "core/or/policies.h"
 #include "feature/client/addressmap.h"
@@ -1419,6 +1420,11 @@ circuit_detach_stream(circuit_t *circ, edge_connection_t *conn)
       if (circ->purpose == CIRCUIT_PURPOSE_S_REND_JOINED) {
         hs_dec_rdv_stream_counter(origin_circ);
       }
+
+      /* If there are no more streams on this circ, tell circpad */
+      if (!origin_circ->p_streams)
+        circpad_machine_event_circ_has_no_streams(origin_circ);
+
       return;
     }
   } else {
@@ -2586,6 +2592,12 @@ link_apconn_to_circ(entry_connection_t *apconn, origin_circuit_t *circ,
   /* add it into the linked list of streams on this circuit */
   log_debug(LD_APP|LD_CIRC, "attaching new conn to circ. n_circ_id %u.",
             (unsigned)circ->base_.n_circ_id);
+
+  /* If this is the first stream on this circuit, tell circpad
+   * that streams are attached */
+  if (!circ->p_streams)
+    circpad_machine_event_circ_has_streams(circ);
+
   /* reset it, so we can measure circ timeouts */
   ENTRY_TO_CONN(apconn)->timestamp_last_read_allowed = time(NULL);
   ENTRY_TO_EDGE_CONN(apconn)->next_stream = circ->p_streams;
@@ -3064,6 +3076,8 @@ circuit_change_purpose(circuit_t *circ, uint8_t new_purpose)
   if (CIRCUIT_IS_ORIGIN(circ)) {
     control_event_circuit_purpose_changed(TO_ORIGIN_CIRCUIT(circ),
                                           old_purpose);
+
+    circpad_machine_event_circ_purpose_changed(TO_ORIGIN_CIRCUIT(circ));
   }
 }
 
diff --git a/src/core/or/connection_edge.c b/src/core/or/connection_edge.c
index 93383a4e01..6b9ed0f211 100644
--- a/src/core/or/connection_edge.c
+++ b/src/core/or/connection_edge.c
@@ -67,6 +67,7 @@
 #include "core/or/circuitbuild.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuituse.h"
+#include "core/or/circuitpadding.h"
 #include "core/or/connection_edge.h"
 #include "core/or/connection_or.h"
 #include "core/or/policies.h"
@@ -3712,6 +3713,10 @@ handle_hs_exit_conn(circuit_t *circ, edge_connection_t *conn)
   /* Link the circuit and the connection crypt path. */
   conn->cpath_layer = origin_circ->cpath->prev;
 
+  /* If this is the first stream on this circuit, tell circpad */
+  if (!origin_circ->p_streams)
+    circpad_machine_event_circ_has_streams(origin_circ);
+
   /* Add it into the linked list of p_streams on this circuit */
   conn->next_stream = origin_circ->p_streams;
   origin_circ->p_streams = conn;
diff --git a/src/core/or/or.h b/src/core/or/or.h
index ca373d8ed5..bf5e3957ad 100644
--- a/src/core/or/or.h
+++ b/src/core/or/or.h
@@ -207,6 +207,9 @@ struct curve25519_public_key_t;
 #define RELAY_COMMAND_RENDEZVOUS_ESTABLISHED 39
 #define RELAY_COMMAND_INTRODUCE_ACK 40
 
+#define RELAY_COMMAND_PADDING_NEGOTIATE 41
+#define RELAY_COMMAND_PADDING_NEGOTIATED 42
+
 /* Reasons why an OR connection is closed. */
 #define END_OR_CONN_REASON_DONE           1
 #define END_OR_CONN_REASON_REFUSED        2 /* connection refused */
@@ -836,6 +839,10 @@ typedef struct protover_summary_flags_t {
    * service rendezvous point supporting version 3 as seen in proposal 224.
    * This requires HSRend=2. */
   unsigned int supports_v3_rendezvous_point: 1;
+
+  /** True iff this router has a protocol list that allows clients to
+   * negotiate link-level padding. Requires Padding>=1. */
+  unsigned int supports_padding : 1;
 } protover_summary_flags_t;
 
 typedef struct routerinfo_t routerinfo_t;
diff --git a/src/core/or/origin_circuit_st.h b/src/core/or/origin_circuit_st.h
index 26cdf590f1..921076c1b9 100644
--- a/src/core/or/origin_circuit_st.h
+++ b/src/core/or/origin_circuit_st.h
@@ -161,6 +161,10 @@ struct origin_circuit_t {
    * connections to this circuit. */
   unsigned int unusable_for_new_conns : 1;
 
+  /* If this flag is set (due to padding negotiation failure), we should
+   * not try to negotiate further circuit padding. */
+  unsigned padding_negotiation_failed : 1;
+
   /**
    * Tristate variable to guard against pathbias miscounting
    * due to circuit purpose transitions changing the decision
diff --git a/src/core/or/protover.c b/src/core/or/protover.c
index e80fbfae81..c0c09c9d17 100644
--- a/src/core/or/protover.c
+++ b/src/core/or/protover.c
@@ -39,6 +39,9 @@ static int protocol_list_contains(const smartlist_t *protos,
 static const struct {
   protocol_type_t protover_type;
   const char *name;
+/* If you add a new protocol here, you probably also want to add
+ * parsing for it in routerstatus_parse_entry_from_string() so that
+ * it is set in routerstatus_t */
 } PROTOCOL_NAMES[] = {
   { PRT_LINK, "Link" },
   { PRT_LINKAUTH, "LinkAuth" },
@@ -49,6 +52,7 @@ static const struct {
   { PRT_HSREND, "HSRend" },
   { PRT_DESC, "Desc" },
   { PRT_MICRODESC, "Microdesc"},
+  { PRT_PADDING, "Padding"},
   { PRT_CONS, "Cons" }
 };
 
@@ -396,7 +400,8 @@ protover_get_supported_protocols(void)
     "LinkAuth=3 "
 #endif
     "Microdesc=1-2 "
-    "Relay=1-2";
+    "Relay=1-2 "
+    "Padding=1";
 }
 
 /** The protocols from protover_get_supported_protocols(), as parsed into a
diff --git a/src/core/or/protover.h b/src/core/or/protover.h
index 7319d2f8c4..ffd4f2c18e 100644
--- a/src/core/or/protover.h
+++ b/src/core/or/protover.h
@@ -43,6 +43,7 @@ typedef enum protocol_type_t {
   PRT_DESC,
   PRT_MICRODESC,
   PRT_CONS,
+  PRT_PADDING,
 } protocol_type_t;
 
 bool protover_contains_long_protocol_names(const char *s);
diff --git a/src/core/or/relay.c b/src/core/or/relay.c
index 2e92f2a55d..00c2111955 100644
--- a/src/core/or/relay.c
+++ b/src/core/or/relay.c
@@ -55,6 +55,7 @@
 #include "core/or/circuitbuild.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuituse.h"
+#include "core/or/circuitpadding.h"
 #include "lib/compress/compress.h"
 #include "app/config/config.h"
 #include "core/mainloop/connection.h"
@@ -80,7 +81,6 @@
 #include "feature/nodelist/describe.h"
 #include "feature/nodelist/routerlist.h"
 #include "core/or/scheduler.h"
-#include "feature/stats/rephist.h"
 
 #include "core/or/cell_st.h"
 #include "core/or/cell_queue_st.h"
@@ -293,7 +293,9 @@ circuit_receive_relay_cell(cell_t *cell, circuit_t *circ,
     return 0;
   }
 
-  /* not recognized. pass it on. */
+  /* not recognized. inform circpad and pass it on. */
+  circpad_deliver_unrecognized_cell_events(circ, cell_direction);
+
   if (cell_direction == CELL_DIRECTION_OUT) {
     cell->circ_id = circ->n_circ_id; /* switch it */
     chan = circ->n_chan;
@@ -353,11 +355,11 @@ circuit_receive_relay_cell(cell_t *cell, circuit_t *circ,
  *  - Encrypt it to the right layer
  *  - Append it to the appropriate cell_queue on <b>circ</b>.
  */
-static int
-circuit_package_relay_cell(cell_t *cell, circuit_t *circ,
+MOCK_IMPL(int,
+circuit_package_relay_cell, (cell_t *cell, circuit_t *circ,
                            cell_direction_t cell_direction,
                            crypt_path_t *layer_hint, streamid_t on_stream,
-                           const char *filename, int lineno)
+                           const char *filename, int lineno))
 {
   channel_t *chan; /* where to send the cell */
 
@@ -524,6 +526,8 @@ relay_command_to_string(uint8_t command)
     case RELAY_COMMAND_INTRODUCE_ACK: return "INTRODUCE_ACK";
     case RELAY_COMMAND_EXTEND2: return "EXTEND2";
     case RELAY_COMMAND_EXTENDED2: return "EXTENDED2";
+    case RELAY_COMMAND_PADDING_NEGOTIATE: return "PADDING_NEGOTIATE";
+    case RELAY_COMMAND_PADDING_NEGOTIATED: return "PADDING_NEGOTIATED";
     default:
       tor_snprintf(buf, sizeof(buf), "Unrecognized relay command %u",
                    (unsigned)command);
@@ -577,8 +581,8 @@ relay_send_command_from_edge_,(streamid_t stream_id, circuit_t *circ,
   log_debug(LD_OR,"delivering %d cell %s.", relay_command,
             cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward");
 
-  if (relay_command == RELAY_COMMAND_DROP)
-    rep_hist_padding_count_write(PADDING_TYPE_DROP);
+  /* Tell circpad we're sending a relay cell */
+  circpad_deliver_sent_relay_cell_events(circ, relay_command);
 
   /* If we are sending an END cell and this circuit is used for a tunneled
    * directory request, advance its state. */
@@ -602,7 +606,9 @@ relay_send_command_from_edge_,(streamid_t stream_id, circuit_t *circ,
        * one of them.  Don't worry about the conn protocol version:
        * append_cell_to_circuit_queue will fix it up. */
       cell.command = CELL_RELAY_EARLY;
-      --origin_circ->remaining_relay_early_cells;
+      /* If we're out of relay early cells, tell circpad */
+      if (--origin_circ->remaining_relay_early_cells == 0)
+        circpad_machine_event_circ_has_no_relay_early(origin_circ);
       log_debug(LD_OR, "Sending a RELAY_EARLY cell; %d remaining.",
                 (int)origin_circ->remaining_relay_early_cells);
       /* Memorize the command that is sent as RELAY_EARLY cell; helps debug
@@ -1481,9 +1487,11 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
     }
   }
 
+  /* Tell circpad that we've recieved a recognized cell */
+  circpad_deliver_recognized_relay_cell_events(circ, rh.command, layer_hint);
+
   /* either conn is NULL, in which case we've got a control cell, or else
    * conn points to the recognized stream. */
-
   if (conn && !connection_state_is_open(TO_CONN(conn))) {
     if (conn->base_.type == CONN_TYPE_EXIT &&
         (conn->base_.state == EXIT_CONN_STATE_CONNECTING ||
@@ -1504,8 +1512,14 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
 
   switch (rh.command) {
     case RELAY_COMMAND_DROP:
-      rep_hist_padding_count_read(PADDING_TYPE_DROP);
-//      log_info(domain,"Got a relay-level padding cell. Dropping.");
+      /* Already examined in circpad_deliver_recognized_relay_cell_events */
+      return 0;
+    case RELAY_COMMAND_PADDING_NEGOTIATE:
+      circpad_handle_padding_negotiate(circ, cell);
+      return 0;
+    case RELAY_COMMAND_PADDING_NEGOTIATED:
+      if (circpad_handle_padding_negotiated(circ, cell, layer_hint) == 0)
+        circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), rh.length);
       return 0;
     case RELAY_COMMAND_BEGIN:
     case RELAY_COMMAND_BEGIN_DIR:
diff --git a/src/core/or/relay.h b/src/core/or/relay.h
index db7f17b96c..e84727e373 100644
--- a/src/core/or/relay.h
+++ b/src/core/or/relay.h
@@ -78,6 +78,11 @@ void destroy_cell_queue_append(destroy_cell_queue_t *queue,
 void channel_unlink_all_circuits(channel_t *chan, smartlist_t *detached_out);
 MOCK_DECL(int, channel_flush_from_first_active_circuit,
           (channel_t *chan, int max));
+MOCK_DECL(int, circuit_package_relay_cell, (cell_t *cell, circuit_t *circ,
+                           cell_direction_t cell_direction,
+                           crypt_path_t *layer_hint, streamid_t on_stream,
+                           const char *filename, int lineno));
+
 void update_circuit_on_cmux_(circuit_t *circ, cell_direction_t direction,
                              const char *file, int lineno);
 #define update_circuit_on_cmux(circ, direction) \
diff --git a/src/core/or/versions.c b/src/core/or/versions.c
index 7bd1f5899f..736313a9cd 100644
--- a/src/core/or/versions.c
+++ b/src/core/or/versions.c
@@ -448,6 +448,8 @@ memoize_protover_summary(protover_summary_flags_t *out,
   out->supports_v3_rendezvous_point =
     protocol_list_supports_protocol(protocols, PRT_HSREND,
                                     PROTOVER_HS_RENDEZVOUS_POINT_V3);
+    out->supports_padding =
+      protocol_list_supports_protocol(protocols, PRT_PADDING, 1);
 
   protover_summary_flags_t *new_cached = tor_memdup(out, sizeof(*out));
   cached = strmap_set(protover_summary_map, protocols, new_cached);
diff --git a/src/feature/hibernate/hibernate.c b/src/feature/hibernate/hibernate.c
index feeb3d92ef..f10a45f4ae 100644
--- a/src/feature/hibernate/hibernate.c
+++ b/src/feature/hibernate/hibernate.c
@@ -37,6 +37,7 @@ hibernating, phase 2:
 #include "core/or/connection_or.h"
 #include "feature/control/control.h"
 #include "lib/crypt_ops/crypto_rand.h"
+#include "lib/defs/time.h"
 #include "feature/hibernate/hibernate.h"
 #include "core/mainloop/mainloop.h"
 #include "feature/relay/router.h"
@@ -832,8 +833,6 @@ hibernate_soft_limit_reached(void)
   return get_accounting_bytes() >= soft_limit;
 }
 
-#define TOR_USEC_PER_SEC (1000000)
-
 /** Called when we get a SIGINT, or when bandwidth soft limit is
  * reached. Puts us into "loose hibernation": we don't accept new
  * connections, but we continue handling old ones. */
diff --git a/src/feature/nodelist/networkstatus.c b/src/feature/nodelist/networkstatus.c
index e1063a0eac..b9c142787a 100644
--- a/src/feature/nodelist/networkstatus.c
+++ b/src/feature/nodelist/networkstatus.c
@@ -44,6 +44,7 @@
 #include "core/mainloop/netstatus.h"
 #include "core/or/channel.h"
 #include "core/or/channelpadding.h"
+#include "core/or/circuitpadding.h"
 #include "core/or/circuitmux.h"
 #include "core/or/circuitmux_ewma.h"
 #include "core/or/circuitstats.h"
@@ -2116,6 +2117,7 @@ networkstatus_set_current_consensus(const char *consensus,
     circuit_build_times_new_consensus_params(
                                get_circuit_build_times_mutable(), c);
     channelpadding_new_consensus_params(c);
+    circpad_new_consensus_params(c);
   }
 
   /* Reset the failure count only if this consensus is actually valid. */
diff --git a/src/feature/nodelist/nodelist.c b/src/feature/nodelist/nodelist.c
index d94e73f48f..33601fe1fa 100644
--- a/src/feature/nodelist/nodelist.c
+++ b/src/feature/nodelist/nodelist.c
@@ -1106,7 +1106,7 @@ node_ed25519_id_matches(const node_t *node, const ed25519_public_key_t *id)
 /** Dummy object that should be unreturnable.  Used to ensure that
  * node_get_protover_summary_flags() always returns non-NULL. */
 static const protover_summary_flags_t zero_protover_flags = {
-  0,0,0,0,0,0,0
+  0,0,0,0,0,0,0,0
 };
 
 /** Return the protover_summary_flags for a given node. */
@@ -2350,7 +2350,7 @@ compute_frac_paths_available(const networkstatus_t *consensus,
   const int authdir = authdir_mode_v3(options);
 
   count_usable_descriptors(num_present_out, num_usable_out,
-                           mid, consensus, now, NULL,
+                           mid, consensus, now, options->MiddleNodes,
                            USABLE_DESCRIPTOR_ALL);
   log_debug(LD_NET,
             "%s: %d present, %d usable",
diff --git a/src/feature/nodelist/routerlist.c b/src/feature/nodelist/routerlist.c
index b4d56459df..c8a658414b 100644
--- a/src/feature/nodelist/routerlist.c
+++ b/src/feature/nodelist/routerlist.c
@@ -3221,6 +3221,8 @@ refresh_all_country_info(void)
     routerset_refresh_countries(options->EntryNodes);
   if (options->ExitNodes)
     routerset_refresh_countries(options->ExitNodes);
+  if (options->MiddleNodes)
+    routerset_refresh_countries(options->MiddleNodes);
   if (options->ExcludeNodes)
     routerset_refresh_countries(options->ExcludeNodes);
   if (options->ExcludeExitNodes)
diff --git a/src/lib/crypt_ops/crypto_rand.c b/src/lib/crypt_ops/crypto_rand.c
index cffd0610f3..d148dfb3a8 100644
--- a/src/lib/crypt_ops/crypto_rand.c
+++ b/src/lib/crypt_ops/crypto_rand.c
@@ -529,6 +529,17 @@ crypto_rand_unmocked(char *to, size_t n)
 }
 
 /**
+ * Draw an unsigned 32-bit integer uniformly at random.
+ */
+uint32_t
+crypto_rand_u32(void)
+{
+  uint32_t rand;
+  crypto_rand((void*)&rand, sizeof(rand));
+  return rand;
+}
+
+/**
  * Return a pseudorandom integer, chosen uniformly from the values
  * between 0 and <b>max</b>-1 inclusive.  <b>max</b> must be between 1 and
  * INT_MAX+1, inclusive.
diff --git a/src/lib/crypt_ops/crypto_rand.h b/src/lib/crypt_ops/crypto_rand.h
index 0c538d81ac..874fcd4d08 100644
--- a/src/lib/crypt_ops/crypto_rand.h
+++ b/src/lib/crypt_ops/crypto_rand.h
@@ -27,6 +27,7 @@ int crypto_rand_int(unsigned int max);
 int crypto_rand_int_range(unsigned int min, unsigned int max);
 uint64_t crypto_rand_uint64_range(uint64_t min, uint64_t max);
 time_t crypto_rand_time_range(time_t min, time_t max);
+uint32_t crypto_rand_u32(void);
 uint64_t crypto_rand_uint64(uint64_t max);
 double crypto_rand_double(void);
 struct tor_weak_rng_t;
diff --git a/src/lib/defs/include.am b/src/lib/defs/include.am
index 48ee7f29fc..6a7f9114ea 100644
--- a/src/lib/defs/include.am
+++ b/src/lib/defs/include.am
@@ -2,4 +2,5 @@
 noinst_HEADERS += 			\
 	src/lib/defs/dh_sizes.h 	\
 	src/lib/defs/digest_sizes.h	\
+	src/lib/defs/time.h      	\
 	src/lib/defs/x25519_sizes.h
diff --git a/src/lib/defs/time.h b/src/lib/defs/time.h
new file mode 100644
index 0000000000..762b23feab
--- /dev/null
+++ b/src/lib/defs/time.h
@@ -0,0 +1,23 @@
+/* Copyright (c) 2001, Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_TIME_DEFS_H
+#define TOR_TIME_DEFS_H
+
+/**
+ * \file time.h
+ *
+ * \brief Definitions for timing-related constants.
+ **/
+
+/** How many microseconds per second */
+#define TOR_USEC_PER_SEC (1000000)
+/** How many nanoseconds per microsecond */
+#define TOR_NSEC_PER_USEC (1000)
+/* How many nanoseconds per millisecond */
+#define TOR_NSEC_PER_MSEC (1000*1000)
+
+#endif
diff --git a/src/lib/math/.may_include b/src/lib/math/.may_include
index 1fd26864dc..f8bc264a5f 100644
--- a/src/lib/math/.may_include
+++ b/src/lib/math/.may_include
@@ -3,3 +3,5 @@ orconfig.h
 lib/cc/*.h
 lib/log/*.h
 lib/math/*.h
+lib/testsupport/*.h
+lib/crypt_ops/*.h
diff --git a/src/lib/math/fp.c b/src/lib/math/fp.c
index d5989db637..57082fa468 100644
--- a/src/lib/math/fp.c
+++ b/src/lib/math/fp.c
@@ -117,3 +117,28 @@ ENABLE_GCC_WARNING(double-promotion)
 ENABLE_GCC_WARNING(float-conversion)
 #endif
 }
+
+/* isinf() wrapper for tor */
+int
+tor_isinf(double x)
+{
+  /* Same as above, work around the "double promotion" warnings */
+#if defined(MINGW_ANY) && GCC_VERSION >= 409
+#define PROBLEMATIC_FLOAT_CONVERSION_WARNING
+DISABLE_GCC_WARNING(float-conversion)
+#endif /* defined(MINGW_ANY) && GCC_VERSION >= 409 */
+#if defined(__clang__)
+#if __has_warning("-Wdouble-promotion")
+#define PROBLEMATIC_DOUBLE_PROMOTION_WARNING
+DISABLE_GCC_WARNING(double-promotion)
+#endif
+#endif /* defined(__clang__) */
+  return isinf(x);
+#ifdef PROBLEMATIC_DOUBLE_PROMOTION_WARNING
+ENABLE_GCC_WARNING(double-promotion)
+#endif
+#ifdef PROBLEMATIC_FLOAT_CONVERSION_WARNING
+ENABLE_GCC_WARNING(float-conversion)
+#endif
+}
+
diff --git a/src/lib/math/fp.h b/src/lib/math/fp.h
index e27b8f8d80..ddf3ed24d6 100644
--- a/src/lib/math/fp.h
+++ b/src/lib/math/fp.h
@@ -19,5 +19,6 @@ double tor_mathlog(double d) ATTR_CONST;
 long tor_lround(double d) ATTR_CONST;
 int64_t tor_llround(double d) ATTR_CONST;
 int64_t clamp_double_to_int64(double number);
+int tor_isinf(double x);
 
 #endif
diff --git a/src/lib/math/include.am b/src/lib/math/include.am
index b088b3f3cc..6d65ce90a7 100644
--- a/src/lib/math/include.am
+++ b/src/lib/math/include.am
@@ -7,7 +7,8 @@ endif
 
 src_lib_libtor_math_a_SOURCES =	\
 		src/lib/math/fp.c		\
-		src/lib/math/laplace.c
+		src/lib/math/laplace.c 	\
+		src/lib/math/prob_distr.c
 
 
 src_lib_libtor_math_testing_a_SOURCES = \
@@ -17,4 +18,5 @@ src_lib_libtor_math_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS)
 
 noinst_HEADERS +=				\
 		src/lib/math/fp.h		\
-		src/lib/math/laplace.h
+		src/lib/math/laplace.h  \
+		src/lib/math/prob_distr.h
diff --git a/src/lib/math/prob_distr.c b/src/lib/math/prob_distr.c
new file mode 100644
index 0000000000..4263ba2074
--- /dev/null
+++ b/src/lib/math/prob_distr.c
@@ -0,0 +1,1717 @@
+/* Copyright (c) 2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file prob_distr.c
+ *
+ * \brief
+ *  Implements various probability distributions.
+ *  Almost all code is courtesy of Riastradh.
+ *
+ * \details
+ * Here are some details that might help you understand this file:
+ *
+ * - Throughout this file, `eps' means the largest relative error of a
+ *   correctly rounded floating-point operation, which in binary64
+ *   floating-point arithmetic is 2^-53.  Here the relative error of a
+ *   true value x from a computed value y is |x - y|/|x|.  This
+ *   definition of epsilon is conventional for numerical analysts when
+ *   writing error analyses.  (If your libm doesn't provide correctly
+ *   rounded exp and log, their relative error is usually below 2*2^-53
+ *   and probably closer to 1.1*2^-53 instead.)
+ *
+ *   The C constant DBL_EPSILON is actually twice this, and should
+ *   perhaps rather be named ulp(1) -- that is, it is the distance from
+ *   1 to the next greater floating-point number, which is usually of
+ *   more interest to programmers and hardware engineers.
+ *
+ *   Since this file is concerned mainly with error bounds rather than
+ *   with low-level bit-hacking of floating-point numbers, we adopt the
+ *   numerical analysts' definition in the comments, though we do use
+ *   DBL_EPSILON in a handful of places where it is convenient to use
+ *   some function of eps = DBL_EPSILON/2 in a case analysis.
+ *
+ * - In various functions (e.g. sample_log_logistic()) we jump through hoops so
+ *   that we can use reals closer to 0 than closer to 1, since we achieve much
+ *   greater accuracy for floating point numbers near 0. In particular, we can
+ *   represent differences as small as 10^-300 for numbers near 0, but of no
+ *   less than 10^-16 for numbers near 1.
+ **/
+
+#define PROB_DISTR_PRIVATE
+
+#include "orconfig.h"
+
+#include "lib/math/prob_distr.h"
+
+#include "lib/crypt_ops/crypto_rand.h"
+#include "lib/cc/ctassert.h"
+
+#include <float.h>
+#include <math.h>
+#include <stddef.h>
+
+/** Validators for downcasting macros below */
+#define validate_container_of(PTR, TYPE, FIELD)                         \
+  (0 * sizeof((PTR) - &((TYPE *)(((char *)(PTR)) -                      \
+      offsetof(TYPE, FIELD)))->FIELD))
+#define validate_const_container_of(PTR, TYPE, FIELD)                   \
+  (0 * sizeof((PTR) - &((const TYPE *)(((const char *)(PTR)) -          \
+      offsetof(TYPE, FIELD)))->FIELD))
+/** Downcasting macro */
+#define container_of(PTR, TYPE, FIELD)                                  \
+  ((TYPE *)(((char *)(PTR)) - offsetof(TYPE, FIELD))                    \
+    + validate_container_of(PTR, TYPE, FIELD))
+/** Constified downcasting macro */
+#define const_container_of(PTR, TYPE, FIELD)                            \
+  ((const TYPE *)(((const char *)(PTR)) - offsetof(TYPE, FIELD))        \
+    + validate_const_container_of(PTR, TYPE, FIELD))
+
+/**
+ * Count number of one bits in 32-bit word.
+ */
+static unsigned
+bitcount32(uint32_t x)
+{
+
+  /* Count two-bit groups.  */
+  x -= (x >> 1) & UINT32_C(0x55555555);
+
+  /* Count four-bit groups.  */
+  x = ((x >> 2) & UINT32_C(0x33333333)) + (x & UINT32_C(0x33333333));
+
+  /* Count eight-bit groups.  */
+  x = (x + (x >> 4)) & UINT32_C(0x0f0f0f0f);
+
+  /* Sum all eight-bit groups, and extract the sum.  */
+  return (x * UINT32_C(0x01010101)) >> 24;
+}
+
+/**
+ * Count leading zeros in 32-bit word.
+ */
+static unsigned
+clz32(uint32_t x)
+{
+
+  /* Round up to a power of two.  */
+  x |= x >> 1;
+  x |= x >> 2;
+  x |= x >> 4;
+  x |= x >> 8;
+  x |= x >> 16;
+
+  /* Subtract count of one bits from 32.  */
+  return (32 - bitcount32(x));
+}
+
+/*
+ * Some lemmas that will be used throughout this file to prove various error
+ * bounds:
+ *
+ * Lemma 1.  If |d| <= 1/2, then 1/(1 + d) <= 2.
+ *
+ * Proof.  If 0 <= d <= 1/2, then 1 + d >= 1, so that 1/(1 + d) <= 1.
+ * If -1/2 <= d <= 0, then 1 + d >= 1/2, so that 1/(1 + d) <= 2.  QED.
+ *
+ * Lemma 2. If b = a*(1 + d)/(1 + d') for |d'| < 1/2 and nonzero a, b,
+ * then b = a*(1 + e) for |e| <= 2|d' - d|.
+ *
+ * Proof.  |a - b|/|a|
+ *             = |a - a*(1 + d)/(1 + d')|/|a|
+ *             = |1 - (1 + d)/(1 + d')|
+ *             = |(1 + d' - 1 - d)/(1 + d')|
+ *             = |(d' - d)/(1 + d')|
+ *            <= 2|d' - d|, by Lemma 1,
+ *
+ * QED.
+ *
+ * Lemma 3.  For |d|, |d'| < 1/4,
+ *
+ *     |log((1 + d)/(1 + d'))| <= 4|d - d'|.
+ *
+ * Proof.  Write
+ *
+ *     log((1 + d)/(1 + d'))
+ *      = log(1 + (1 + d)/(1 + d') - 1)
+ *      = log(1 + (1 + d - 1 - d')/(1 + d')
+ *      = log(1 + (d - d')/(1 + d')).
+ *
+ * By Lemma 1, |(d - d')/(1 + d')| < 2|d' - d| < 1, so the Taylor
+ * series of log(1 + x) converges absolutely for (d - d')/(1 + d'),
+ * and thus we have
+ *
+ *     |log(1 + (d - d')/(1 + d'))|
+ *      = |\sum_{n=1}^\infty ((d - d')/(1 + d'))^n/n|
+ *     <= \sum_{n=1}^\infty |(d - d')/(1 + d')|^n/n
+ *     <= \sum_{n=1}^\infty |2(d' - d)|^n/n
+ *     <= \sum_{n=1}^\infty |2(d' - d)|^n
+ *      = 1/(1 - |2(d' - d)|)
+ *     <= 4|d' - d|,
+ *
+ * QED.
+ *
+ * Lemma 4.  If 1/e <= 1 + x <= e, then
+ *
+ *     log(1 + (1 + d) x) = (1 + d') log(1 + x)
+ *
+ * for |d'| < 8|d|.
+ *
+ * Proof.  Write
+ *
+ *     log(1 + (1 + d) x)
+ *     = log(1 + x + x*d)
+ *     = log((1 + x) (1 + x + x*d)/(1 + x))
+ *     = log(1 + x) + log((1 + x + x*d)/(1 + x))
+ *     = log(1 + x) (1 + log((1 + x + x*d)/(1 + x))/log(1 + x)).
+ *
+ * The relative error is bounded by
+ *
+ *     |log((1 + x + x*d)/(1 + x))/log(1 + x)|
+ *     <= 4|x + x*d - x|/|log(1 + x)|, by Lemma 3,
+ *      = 4|x*d|/|log(1 + x)|
+ *      < 8|d|,
+ *
+ * since in this range 0 < 1 - 1/e < x/log(1 + x) <= e - 1 < 2.  QED.
+ */
+
+/**
+ * Compute the logistic function: f(x) = 1/(1 + e^{-x}) = e^x/(1 + e^x).
+ * Maps a log-odds-space probability in [-\infty, +\infty] into a direct-space
+ * probability in [0,1].  Inverse of logit.
+ *
+ * Ill-conditioned for large x; the identity logistic(-x) = 1 -
+ * logistic(x) and the function logistichalf(x) = logistic(x) - 1/2 may
+ * help to rearrange a computation.
+ *
+ * This implementation gives relative error bounded by 7 eps.
+ */
+STATIC double
+logistic(double x)
+{
+  if (x <= log(DBL_EPSILON/2)) {
+    /*
+     * If x <= log(DBL_EPSILON/2) = log(eps), then e^x <= eps. In this case
+     * we will approximate the logistic() function with e^x because the
+     * relative error is less than eps. Here is a calculation of the
+     * relative error between the logistic() function and e^x and a proof
+     * that it's less than eps:
+     *
+     *     |e^x - e^x/(1 + e^x)|/|e^x/(1 + e^x)|
+     *     <= |1 - 1/(1 + e^x)|*|1 + e^x|
+     *      = |e^x/(1 + e^x)|*|1 + e^x|
+     *      = |e^x|
+     *     <= eps.
+     */
+    return exp(x); /* return e^x */
+  } else if (x <= -log(DBL_EPSILON/2)) {
+    /*
+     * e^{-x} > 0, so 1 + e^{-x} > 1, and 0 < 1/(1 +
+     * e^{-x}) < 1; further, since e^{-x} < 1 + e^{-x}, we
+     * also have 0 < 1/(1 + e^{-x}) < 1.  Thus, if exp has
+     * relative error d0, + has relative error d1, and /
+     * has relative error d2, then we get
+     *
+     *     (1 + d2)/[(1 + (1 + d0) e^{-x})(1 + d1)]
+     *     = (1 + d0)/[1 + e^{-x} + d0 e^{-x}
+     *                     + d1 + d1 e^{-x} + d0 d1 e^{-x}]
+     *     = (1 + d0)/[(1 + e^{-x})
+     *                 * (1 + d0 e^{-x}/(1 + e^{-x})
+     *                      + d1/(1 + e^{-x})
+     *                      + d0 d1 e^{-x}/(1 + e^{-x}))].
+     *     = (1 + d0)/[(1 + e^{-x})(1 + d')]
+     *     = [1/(1 + e^{-x})] (1 + d0)/(1 + d')
+     *
+     * where
+     *
+     *     d' = d0 e^{-x}/(1 + e^{-x})
+     *          + d1/(1 + e^{-x})
+     *          + d0 d1 e^{-x}/(1 + e^{-x}).
+     *
+     * By Lemma 2 this relative error is bounded by
+     *
+     *     2|d0 - d'|
+     *      = 2|d0 - d0 e^{-x}/(1 + e^{-x})
+     *             - d1/(1 + e^{-x})
+     *             - d0 d1 e^{-x}/(1 + e^{-x})|
+     *     <= 2|d0| + 2|d0 e^{-x}/(1 + e^{-x})|
+     *             + 2|d1/(1 + e^{-x})|
+     *             + 2|d0 d1 e^{-x}/(1 + e^{-x})|
+     *     <= 2|d0| + 2|d0| + 2|d1| + 2|d0 d1|
+     *     <= 4|d0| + 2|d1| + 2|d0 d1|
+     *     <= 6 eps + 2 eps^2.
+     */
+    return 1/(1 + exp(-x));
+  } else {
+    /*
+     * e^{-x} <= eps, so the relative error of 1 from 1/(1
+     * + e^{-x}) is
+     *
+     *     |1/(1 + e^{-x}) - 1|/|1/(1 + e^{-x})|
+     *      = |e^{-x}/(1 + e^{-x})|/|1/(1 + e^{-x})|
+     *      = |e^{-x}|
+     *     <= eps.
+     *
+     * This computation avoids an intermediate overflow
+     * exception, although the effect on the result is
+     * harmless.
+     *
+     * XXX Should maybe raise inexact here.
+     */
+    return 1;
+  }
+}
+
+/**
+ * Compute the logit function: log p/(1 - p).  Defined on [0,1].  Maps
+ * a direct-space probability in [0,1] to a log-odds-space probability
+ * in [-\infty, +\infty].  Inverse of logistic.
+ *
+ * Ill-conditioned near 1/2 and 1; the identity logit(1 - p) =
+ * -logit(p) and the function logithalf(p0) = logit(1/2 + p0) may help
+ * to rearrange a computation for p in [1/(1 + e), 1 - 1/(1 + e)].
+ *
+ * This implementation gives relative error bounded by 10 eps.
+ */
+STATIC double
+logit(double p)
+{
+
+  /* logistic(-1) <= p <= logistic(+1) */
+  if (1/(1 + exp(1)) <= p && p <= 1/(1 + exp(-1))) {
+    /*
+     * For inputs near 1/2, we want to compute log1p(near
+     * 0) rather than log(near 1), so write this as:
+     *
+     * log(p/(1 - p)) = -log((1 - p)/p)
+     * = -log(1 + (1 - p)/p - 1)
+     * = -log(1 + (1 - p - p)/p)
+     * = -log(1 + (1 - 2p)/p).
+     *
+     * Since p = 2p/2 <= 1 <= 2*2p = 4p, the floating-point
+     * evaluation of 1 - 2p is exact; the only error arises
+     * from division and log1p.  First, note that if
+     * logistic(-1) <= p <= logistic(+1), (1 - 2p)/p lies
+     * in the bounds of Lemma 4.
+     *
+     * If division has relative error d0 and log1p has
+     * relative error d1, the outcome is
+     *
+     *     -(1 + d1) log(1 + (1 - 2p) (1 + d0)/p)
+     *     = -(1 + d1) (1 + d') log(1 + (1 - 2p)/p)
+     *     = -(1 + d1 + d' + d1 d') log(1 + (1 - 2p)/p).
+     *
+     * where |d'| < 8|d0| by Lemma 4.  The relative error
+     * is then bounded by
+     *
+     *     |d1 + d' + d1 d'|
+     *     <= |d1| + 8|d0| + 8|d1 d0|
+     *     <= 9 eps + 8 eps^2.
+     */
+    return -log1p((1 - 2*p)/p);
+  } else {
+    /*
+     * For inputs near 0, although 1 - p may be rounded to
+     * 1, it doesn't matter much because the magnitude of
+     * the result is so much larger.  For inputs near 1, we
+     * can compute 1 - p exactly, although the precision on
+     * the input is limited so we won't ever get more than
+     * about 700 for the output.
+     *
+     * If - has relative error d0, / has relative error d1,
+     * and log has relative error d2, then
+     *
+     *     (1 + d2) log((1 + d0) p/[(1 - p)(1 + d1)])
+     *     = (1 + d2) [log(p/(1 - p)) + log((1 + d0)/(1 + d1))]
+     *     = log(p/(1 - p)) + d2 log(p/(1 - p))
+     *       + (1 + d2) log((1 + d0)/(1 + d1))
+     *     = log(p/(1 - p))*[1 + d2 +
+     *         + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))]
+     *
+     * Since 0 <= p < logistic(-1) or logistic(+1) < p <=
+     * 1, we have |log(p/(1 - p))| > 1.  Hence this error
+     * is bounded by
+     *
+     *     |d2 + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))|
+     *     <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))
+     *                      / log(p/(1 - p))|
+     *     <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))|
+     *     <= |d2| + 4|(1 + d2) (d0 - d1)|, by Lemma 3,
+     *     <= |d2| + 4|d0 - d1 + d2 d0 - d1 d0|
+     *     <= |d2| + 4|d0| + 4|d1| + 4|d2 d0| + 4|d1 d0|
+     *     <= 9 eps + 8 eps^2.
+     */
+    return log(p/(1 - p));
+  }
+}
+
+/**
+ * Compute the logit function, translated in input by 1/2: logithalf(p)
+ * = logit(1/2 + p).  Defined on [-1/2, 1/2].  Inverse of logistichalf.
+ *
+ * Ill-conditioned near +/-1/2.  If |p0| > 1/2 - 1/(1 + e), it may be
+ * better to compute 1/2 + p0 or -1/2 - p0 and to use logit instead.
+ * This implementation gives relative error bounded by 34 eps.
+ */
+STATIC double
+logithalf(double p0)
+{
+
+  if (fabs(p0) <= 0.5 - 1/(1 + exp(1))) {
+    /*
+     * logit(1/2 + p0)
+     * = log((1/2 + p0)/(1 - (1/2 + p0)))
+     * = log((1/2 + p0)/(1/2 - p0))
+     * = log(1 + (1/2 + p0)/(1/2 - p0) - 1)
+     * = log(1 + (1/2 + p0 - (1/2 - p0))/(1/2 - p0))
+     * = log(1 + (1/2 + p0 - 1/2 + p0)/(1/2 - p0))
+     * = log(1 + 2 p0/(1/2 - p0))
+     *
+     * If the error of subtraction is d0, the error of
+     * division is d1, and the error of log1p is d2, then
+     * what we compute is
+     *
+     *  (1 + d2) log(1 + (1 + d1) 2 p0/[(1 + d0) (1/2 - p0)])
+     *  = (1 + d2) log(1 + (1 + d') 2 p0/(1/2 - p0))
+     *  = (1 + d2) (1 + d'') log(1 + 2 p0/(1/2 - p0))
+     *  = (1 + d2 + d'' + d2 d'') log(1 + 2 p0/(1/2 - p0)),
+     *
+     * where |d'| < 2|d0 - d1| <= 4 eps by Lemma 2, and
+     * |d''| < 8|d'| < 32 eps by Lemma 4 since
+     *
+     *  1/e <= 1 + 2*p0/(1/2 - p0) <= e
+     *
+     * when |p0| <= 1/2 - 1/(1 + e).  Hence the relative
+     * error is bounded by
+     *
+     *  |d2 + d'' + d2 d''|
+     *  <= |d2| + |d''| + |d2 d''|
+     *  <= |d1| + 32 |d0| + 32 |d1 d0|
+     *  <= 33 eps + 32 eps^2.
+     */
+    return log1p(2*p0/(0.5 - p0));
+  } else {
+    /*
+     * We have a choice of computing logit(1/2 + p0) or
+     * -logit(1 - (1/2 + p0)) = -logit(1/2 - p0).  It
+     * doesn't matter which way we do this: either way,
+     * since 1/2 p0 <= 1/2 <= 2 p0, the sum and difference
+     * are computed exactly.  So let's do the one that
+     * skips the final negation.
+     *
+     * The result is
+     *
+     *  (1 + d1) log((1 + d0) (1/2 + p0)/[(1 + d2) (1/2 - p0)])
+     *  = (1 + d1) (1 + log((1 + d0)/(1 + d2))
+     *                  / log((1/2 + p0)/(1/2 - p0)))
+     *    * log((1/2 + p0)/(1/2 - p0))
+     *  = (1 + d') log((1/2 + p0)/(1/2 - p0))
+     *  = (1 + d') logit(1/2 + p0)
+     *
+     * where
+     *
+     *  d' = d1 + log((1 + d0)/(1 + d2))/logit(1/2 + p0)
+     *       + d1 log((1 + d0)/(1 + d2))/logit(1/2 + p0).
+     *
+     * For |p| > 1/2 - 1/(1 + e), logit(1/2 + p0) > 1.
+     * Provided |d0|, |d2| < 1/4, by Lemma 3 we have
+     *
+     *  |log((1 + d0)/(1 + d2))| <= 4|d0 - d2|.
+     *
+     * Hence the relative error is bounded by
+     *
+     *  |d'| <= |d1| + 4|d0 - d2| + 4|d1| |d0 - d2|
+     *       <= |d1| + 4|d0| + 4|d2| + 4|d1 d0| + 4|d1 d2|
+     *       <= 9 eps + 8 eps^2.
+     */
+    return log((0.5 + p0)/(0.5 - p0));
+  }
+}
+
+/*
+ * The following random_uniform_01 is tailored for IEEE 754 binary64
+ * floating-point or smaller.  It can be adapted to larger
+ * floating-point formats like i387 80-bit or IEEE 754 binary128, but
+ * it may require sampling more bits.
+ */
+CTASSERT(FLT_RADIX == 2);
+CTASSERT(-DBL_MIN_EXP <= 1021);
+CTASSERT(DBL_MANT_DIG <= 53);
+
+/**
+ * Draw a floating-point number in [0, 1] with uniform distribution.
+ *
+ * Note that the probability of returning 0 is less than 2^-1074, so
+ * callers need not check for it.  However, callers that cannot handle
+ * rounding to 1 must deal with that, because it occurs with
+ * probability 2^-54, which is small but nonnegligible.
+ */
+STATIC double
+random_uniform_01(void)
+{
+  uint32_t z, x, hi, lo;
+  double s;
+
+  /*
+   * Draw an exponent, geometrically distributed, but give up if
+   * we get a run of more than 1088 zeros, which really means the
+   * system is broken.
+   */
+  z = 0;
+  while ((x = crypto_rand_u32()) == 0) {
+    if (z >= 1088)
+      /* Your bit sampler is broken.  Go home.  */
+      return 0;
+    z += 32;
+  }
+  z += clz32(x);
+
+  /*
+   * Pick 32-bit halves of an odd normalized significand.
+   * Picking it odd breaks ties in the subsequent rounding, which
+   * occur only with measure zero in the uniform distribution on
+   * [0, 1].
+   */
+  hi = crypto_rand_u32() | UINT32_C(0x80000000);
+  lo = crypto_rand_u32() | UINT32_C(0x00000001);
+
+  /* Round to nearest scaled significand in [2^63, 2^64].  */
+  s = hi*(double)4294967296 + lo;
+
+  /* Rescale into [1/2, 1] and apply exponent in one swell foop.  */
+  return s * ldexp(1, -(64 + z));
+}
+
+/*******************************************************************/
+
+/* Functions for specific probability distributions start here: */
+
+/*
+ * Logistic(mu, sigma) distribution, supported on (-\infty,+\infty)
+ *
+ * This is the uniform distribution on [0,1] mapped into log-odds
+ * space, scaled by sigma and translated by mu.
+ *
+ * pdf(x) = e^{-(x - mu)/sigma} sigma (1 + e^{-(x - mu)/sigma})^2
+ * cdf(x) = 1/(1 + e^{-(x - mu)/sigma}) = logistic((x - mu)/sigma)
+ * sf(x) = 1 - cdf(x) = 1 - logistic((x - mu)/sigma = logistic(-(x - mu)/sigma)
+ * icdf(p) = mu + sigma log p/(1 - p) = mu + sigma logit(p)
+ * isf(p) = mu + sigma log (1 - p)/p = mu - sigma logit(p)
+ */
+
+/**
+ * Compute the CDF of the Logistic(mu, sigma) distribution: the
+ * logistic function.  Well-conditioned for negative inputs and small
+ * positive inputs; ill-conditioned for large positive inputs.
+ */
+STATIC double
+cdf_logistic(double x, double mu, double sigma)
+{
+  return logistic((x - mu)/sigma);
+}
+
+/**
+ * Compute the SF of the Logistic(mu, sigma) distribution: the logistic
+ * function reflected over the y axis.  Well-conditioned for positive
+ * inputs and small negative inputs; ill-conditioned for large negative
+ * inputs.
+ */
+STATIC double
+sf_logistic(double x, double mu, double sigma)
+{
+  return logistic(-(x - mu)/sigma);
+}
+
+/**
+ * Compute the inverse of the CDF of the Logistic(mu, sigma)
+ * distribution: the logit function.  Well-conditioned near 0;
+ * ill-conditioned near 1/2 and 1.
+ */
+STATIC double
+icdf_logistic(double p, double mu, double sigma)
+{
+  return mu + sigma*logit(p);
+}
+
+/**
+ * Compute the inverse of the SF of the Logistic(mu, sigma)
+ * distribution: the -logit function.  Well-conditioned near 0;
+ * ill-conditioned near 1/2 and 1.
+ */
+STATIC double
+isf_logistic(double p, double mu, double sigma)
+{
+  return mu - sigma*logit(p);
+}
+
+/*
+ * LogLogistic(alpha, beta) distribution, supported on (0, +\infty).
+ *
+ * This is the uniform distribution on [0,1] mapped into odds space,
+ * scaled by positive alpha and shaped by positive beta.
+ *
+ * Equivalent to computing exp of a Logistic(log alpha, 1/beta) sample.
+ * (Name arises because the pdf has LogLogistic(x; alpha, beta) =
+ * Logistic(log x; log alpha, 1/beta) and mathematicians got their
+ * covariance contravariant.)
+ *
+ * pdf(x) = (beta/alpha) (x/alpha)^{beta - 1}/(1 + (x/alpha)^beta)^2
+ *        = (1/e^mu sigma) (x/e^mu)^{1/sigma - 1} /
+ *              (1 + (x/e^mu)^{1/sigma})^2
+ * cdf(x) = 1/(1 + (x/alpha)^-beta) = 1/(1 + (x/e^mu)^{-1/sigma})
+ *        = 1/(1 + (e^{log x}/e^mu)^{-1/sigma})
+ *        = 1/(1 + (e^{log x - mu})^{-1/sigma})
+ *        = 1/(1 + e^{-(log x - mu)/sigma})
+ *        = logistic((log x - mu)/sigma)
+ *        = logistic((log x - log alpha)/(1/beta))
+ * sf(x) = 1 - 1/(1 + (x/alpha)^-beta)
+ *       = (x/alpha)^-beta/(1 + (x/alpha)^-beta)
+ *       = 1/((x/alpha)^beta + 1)
+ *       = 1/(1 + (x/alpha)^beta)
+ * icdf(p) = alpha (p/(1 - p))^{1/beta}
+ *         = alpha e^{logit(p)/beta}
+ *         = e^{mu + sigma logit(p)}
+ * isf(p) = alpha ((1 - p)/p)^{1/beta}
+ *        = alpha e^{-logit(p)/beta}
+ *        = e^{mu - sigma logit(p)}
+ */
+
+/**
+ * Compute the CDF of the LogLogistic(alpha, beta) distribution.
+ * Well-conditioned for all x and alpha, and the condition number
+ *
+ *      -beta/[1 + (x/alpha)^{-beta}]
+ *
+ * grows linearly with beta.
+ *
+ * Loosely, the relative error of this implementation is bounded by
+ *
+ *      4 eps + 2 eps^2 + O(beta eps),
+ *
+ * so don't bother trying this for beta anywhere near as large as
+ * 1/eps, around which point it levels off at 1.
+ */
+STATIC double
+cdf_log_logistic(double x, double alpha, double beta)
+{
+  /*
+   * Let d0 be the error of x/alpha; d1, of pow; d2, of +; and
+   * d3, of the final quotient.  The exponentiation gives
+   *
+   *    ((1 + d0) x/alpha)^{-beta}
+   *    = (x/alpha)^{-beta} (1 + d0)^{-beta}
+   *    = (x/alpha)^{-beta} (1 + (1 + d0)^{-beta} - 1)
+   *    = (x/alpha)^{-beta} (1 + d')
+   *
+   * where d' = (1 + d0)^{-beta} - 1.  If y = (x/alpha)^{-beta},
+   * the denominator is
+   *
+   *    (1 + d2) (1 + (1 + d1) (1 + d') y)
+   *    = (1 + d2) (1 + y + (d1 + d' + d1 d') y)
+   *    = 1 + y + (1 + d2) (d1 + d' + d1 d') y
+   *    = (1 + y) (1 + (1 + d2) (d1 + d' + d1 d') y/(1 + y))
+   *    = (1 + y) (1 + d''),
+   *
+   * where d'' = (1 + d2) (d1 + d' + d1 d') y/(1 + y).  The
+   * final result is
+   *
+   *    (1 + d3) / [(1 + d2) (1 + d'') (1 + y)]
+   *    = (1 + d''') / (1 + y)
+   *
+   * for |d'''| <= 2|d3 - d''| by Lemma 2 as long as |d''| < 1/2
+   * (which may not be the case for very large beta).  This
+   * relative error is therefore bounded by
+   *
+   *    |d'''|
+   *    <= 2|d3 - d''|
+   *    <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d') y/(1 + y)|
+   *    <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d')|
+   *     = 2|d3| + 2|d1 + d' + d1 d' + d2 d1 + d2 d' + d2 d1 d'|
+   *      <= 2|d3| + 2|d1| + 2|d'| + 2|d1 d'| + 2|d2 d1| + 2|d2 d'|
+   *         + 2|d2 d1 d'|
+   *      <= 4 eps + 2 eps^2 + (2 + 2 eps + 2 eps^2) |d'|.
+   *
+   * Roughly, |d'| = |(1 + d0)^{-beta} - 1| grows like beta eps,
+   * until it levels off at 1.
+   */
+  return 1/(1 + pow(x/alpha, -beta));
+}
+
+/**
+ * Compute the SF of the LogLogistic(alpha, beta) distribution.
+ * Well-conditioned for all x and alpha, and the condition number
+ *
+ *      beta/[1 + (x/alpha)^beta]
+ *
+ * grows linearly with beta.
+ *
+ * Loosely, the relative error of this implementation is bounded by
+ *
+ *      4 eps + 2 eps^2 + O(beta eps)
+ *
+ * so don't bother trying this for beta anywhere near as large as
+ * 1/eps, beyond which point it grows unbounded.
+ */
+STATIC double
+sf_log_logistic(double x, double alpha, double beta)
+{
+  /*
+   * The error analysis here is essentially the same as in
+   * cdf_log_logistic, except that rather than levelling off at
+   * 1, |(1 + d0)^beta - 1| grows unbounded.
+   */
+  return 1/(1 + pow(x/alpha, beta));
+}
+
+/**
+ * Compute the inverse of the CDF of the LogLogistic(alpha, beta)
+ * distribution.  Ill-conditioned for p near 1 and beta near 0 with
+ * condition number 1/[beta (1 - p)].
+ */
+STATIC double
+icdf_log_logistic(double p, double alpha, double beta)
+{
+  return alpha*pow(p/(1 - p), 1/beta);
+}
+
+/**
+ * Compute the inverse of the SF of the LogLogistic(alpha, beta)
+ * distribution.  Ill-conditioned for p near 1 and for large beta, with
+ * condition number -1/[beta (1 - p)].
+ */
+STATIC double
+isf_log_logistic(double p, double alpha, double beta)
+{
+  return alpha*pow((1 - p)/p, 1/beta);
+}
+
+/*
+ * Weibull(lambda, k) distribution, supported on (0, +\infty).
+ *
+ * pdf(x) = (k/lambda) (x/lambda)^{k - 1} e^{-(x/lambda)^k}
+ * cdf(x) = 1 - e^{-(x/lambda)^k}
+ * icdf(p) = lambda * (-log (1 - p))^{1/k}
+ * sf(x) = e^{-(x/lambda)^k}
+ * isf(p) = lambda * (-log p)^{1/k}
+ */
+
+/**
+ * Compute the CDF of the Weibull(lambda, k) distribution.
+ * Well-conditioned for small x and k, and for large lambda --
+ * condition number
+ *
+ *      -k (x/lambda)^k exp(-(x/lambda)^k)/[exp(-(x/lambda)^k) - 1]
+ *
+ * grows linearly with k, x^k, and lambda^{-k}.
+ */
+STATIC double
+cdf_weibull(double x, double lambda, double k)
+{
+  return -expm1(-pow(x/lambda, k));
+}
+
+/**
+ * Compute the SF of the Weibull(lambda, k) distribution.
+ * Well-conditioned for small x and k, and for large lambda --
+ * condition number
+ *
+ *      -k (x/lambda)^k
+ *
+ * grows linearly with k, x^k, and lambda^{-k}.
+ */
+STATIC double
+sf_weibull(double x, double lambda, double k)
+{
+  return exp(-pow(x/lambda, k));
+}
+
+/**
+ * Compute the inverse of the CDF of the Weibull(lambda, k)
+ * distribution.  Ill-conditioned for p near 1, and for k near 0;
+ * condition number is
+ *
+ *      (p/(1 - p))/(k log(1 - p)).
+ */
+STATIC double
+icdf_weibull(double p, double lambda, double k)
+{
+  return lambda*pow(-log1p(-p), 1/k);
+}
+
+/**
+ * Compute the inverse of the SF of the Weibull(lambda, k)
+ * distribution.  Ill-conditioned for p near 0, and for k near 0;
+ * condition number is
+ *
+ *      1/(k log(p)).
+ */
+STATIC double
+isf_weibull(double p, double lambda, double k)
+{
+  return lambda*pow(-log(p), 1/k);
+}
+
+/*
+ * GeneralizedPareto(mu, sigma, xi), supported on (mu, +\infty) for
+ * nonnegative xi, or (mu, mu - sigma/xi) for negative xi.
+ *
+ * Samples:
+ * = mu - sigma log U, if xi = 0;
+ * = mu + sigma (U^{-xi} - 1)/xi = mu + sigma*expm1(-xi log U)/xi, if xi =/= 0,
+ * where U is uniform on (0,1].
+ * = mu + sigma (e^{xi X} - 1)/xi,
+ * where X has standard exponential distribution.
+ *
+ * pdf(x) = sigma^{-1} (1 + xi (x - mu)/sigma)^{-(1 + 1/xi)}
+ * cdf(x) = 1 - (1 + xi (x - mu)/sigma)^{-1/xi}
+ *        = 1 - e^{-log(1 + xi (x - mu)/sigma)/xi}
+ *        --> 1 - e^{-(x - mu)/sigma}  as  xi --> 0
+ * sf(x) = (1 + xi (x - mu)/sigma)^{-1/xi}
+ *       --> e^{-(x - mu)/sigma}  as  xi --> 0
+ * icdf(p) = mu + sigma*(p^{-xi} - 1)/xi
+ *         = mu + sigma*expm1(-xi log p)/xi
+ *         --> mu + sigma*log p  as  xi --> 0
+ * isf(p) = mu + sigma*((1 - p)^{xi} - 1)/xi
+ *        = mu + sigma*expm1(-xi log1p(-p))/xi
+ *        --> mu + sigma*log1p(-p)  as  xi --> 0
+ */
+
+/**
+ * Compute the CDF of the GeneralizedPareto(mu, sigma, xi)
+ * distribution.  Well-conditioned everywhere.  For standard
+ * distribution (mu=0, sigma=1), condition number
+ *
+ *      (x/(1 + x xi)) / ((1 + x xi)^{1/xi} - 1)
+ *
+ * is bounded by 1, attained only at x = 0.
+ */
+STATIC double
+cdf_genpareto(double x, double mu, double sigma, double xi)
+{
+  double x_0 = (x - mu)/sigma;
+
+  /*
+   * log(1 + xi x_0)/xi
+   * = (-1/xi) \sum_{n=1}^\infty (-xi x_0)^n/n
+   * = (-1/xi) (-xi x_0 + \sum_{n=2}^\infty (-xi x_0)^n/n)
+   * = x_0 - (1/xi) \sum_{n=2}^\infty (-xi x_0)^n/n
+   * = x_0 - x_0 \sum_{n=2}^\infty (-xi x_0)^{n-1}/n
+   * = x_0 (1 - d),
+   *
+   * where d = \sum_{n=2}^\infty (-xi x_0)^{n-1}/n.  If |xi| <
+   * eps/4|x_0|, then
+   *
+   * |d| <= \sum_{n=2}^\infty (eps/4)^{n-1}/n
+   *     <= \sum_{n=2}^\infty (eps/4)^{n-1}
+   *      = \sum_{n=1}^\infty (eps/4)^n
+   *      = (eps/4) \sum_{n=0}^\infty (eps/4)^n
+   *      = (eps/4)/(1 - eps/4)
+   *      < eps/2
+   *
+   * for any 0 < eps < 2.  Thus, the relative error of x_0 from
+   * log(1 + xi x_0)/xi is bounded by eps.
+   */
+  if (fabs(xi) < 1e-17/x_0)
+    return -expm1(-x_0);
+  else
+    return -expm1(-log1p(xi*x_0)/xi);
+}
+
+/**
+ * Compute the SF of the GeneralizedPareto(mu, sigma, xi) distribution.
+ * For standard distribution (mu=0, sigma=1), ill-conditioned for xi
+ * near 0; condition number
+ *
+ *      -x (1 + x xi)^{(-1 - xi)/xi}/(1 + x xi)^{-1/xi}
+ *      = -x (1 + x xi)^{-1/xi - 1}/(1 + x xi)^{-1/xi}
+ *      = -(x/(1 + x xi)) (1 + x xi)^{-1/xi}/(1 + x xi)^{-1/xi}
+ *      = -x/(1 + x xi)
+ *
+ * is bounded by 1/xi.
+ */
+STATIC double
+sf_genpareto(double x, double mu, double sigma, double xi)
+{
+  double x_0 = (x - mu)/sigma;
+
+  if (fabs(xi) < 1e-17/x_0)
+    return exp(-x_0);
+  else
+    return exp(-log1p(xi*x_0)/xi);
+}
+
+/**
+ * Compute the inverse of the CDF of the GeneralizedPareto(mu, sigma,
+ * xi) distribution.  Ill-conditioned for p near 1; condition number is
+ *
+ *      xi (p/(1 - p))/(1 - (1 - p)^xi)
+ */
+STATIC double
+icdf_genpareto(double p, double mu, double sigma, double xi)
+{
+  /*
+   * To compute f(xi) = (U^{-xi} - 1)/xi = (e^{-xi log U} - 1)/xi
+   * for xi near zero (note f(xi) --> -log U as xi --> 0), write
+   * the absolutely convergent Taylor expansion
+   *
+   * f(xi) = (1/xi)*(-xi log U + \sum_{n=2}^\infty (-xi log U)^n/n!
+   *       = -log U + (1/xi)*\sum_{n=2}^\infty (-xi log U)^n/n!
+   *       = -log U + \sum_{n=2}^\infty xi^{n-1} (-log U)^n/n!
+   *       = -log U - log U \sum_{n=2}^\infty (-xi log U)^{n-1}/n!
+   *       = -log U (1 + \sum_{n=2}^\infty (-xi log U)^{n-1}/n!).
+   *
+   * Let d = \sum_{n=2}^\infty (-xi log U)^{n-1}/n!.  What do we
+   * lose if we discard it and use -log U as an approximation to
+   * f(xi)?  If |xi| < eps/-4log U, then
+   *
+   * |d| <= \sum_{n=2}^\infty |xi log U|^{n-1}/n!
+   *     <= \sum_{n=2}^\infty (eps/4)^{n-1}/n!
+   *     <= \sum_{n=1}^\infty (eps/4)^n
+   *      = (eps/4) \sum_{n=0}^\infty (eps/4)^n
+   *      = (eps/4)/(1 - eps/4)
+   *      < eps/2,
+   *
+   * for any 0 < eps < 2.  Hence, as long as |xi| < eps/-2log U,
+   * f(xi) = -log U (1 + d) for |d| <= eps/2.  |d| is the
+   * relative error of f(xi) from -log U; from this bound, the
+   * relative error of -log U from f(xi) is at most (eps/2)/(1 -
+   * eps/2) = eps/2 + (eps/2)^2 + (eps/2)^3 + ... < eps for 0 <
+   * eps < 1.  Since -log U < 1000 for all U in (0, 1] in
+   * binary64 floating-point, we can safely cut xi off at 1e-20 <
+   * eps/4000 and attain <1ulp error from series truncation.
+   */
+  if (fabs(xi) <= 1e-20)
+    return mu - sigma*log1p(-p);
+  else
+    return mu + sigma*expm1(-xi*log1p(-p))/xi;
+}
+
+/**
+ * Compute the inverse of the SF of the GeneralizedPareto(mu, sigma,
+ * xi) distribution.  Ill-conditioned for p near 1; conditon number is
+ *
+ *      -xi/(1 - p^{-xi})
+ */
+STATIC double
+isf_genpareto(double p, double mu, double sigma, double xi)
+{
+  if (fabs(xi) <= 1e-20)
+    return mu - sigma*log(p);
+  else
+    return mu + sigma*expm1(-xi*log(p))/xi;
+}
+
+/*******************************************************************/
+
+/**
+ * Deterministic samplers, parametrized by uniform integer and (0,1]
+ * samples.  No guarantees are made about _which_ mapping from the
+ * integer and (0,1] samples these use; all that is guaranteed is the
+ * distribution of the outputs conditioned on a uniform distribution on
+ * the inputs.  The automatic tests in test_prob_distr.c double-check
+ * the particular mappings we use.
+ *
+ * Beware: Unlike random_uniform_01(), these are not guaranteed to be
+ * supported on all possible outputs.  See Ilya Mironov, `On the
+ * Significance of the Least Significant Bits for Differential
+ * Privacy', for an example of what can go wrong if you try to use
+ * these to conceal information from an adversary but you expose the
+ * specific full-precision floating-point values.
+ *
+ * Note: None of these samplers use rejection sampling; they are all
+ * essentially inverse-CDF transforms with tweaks.  If you were to add,
+ * say, a Gamma sampler with the Marsaglia-Tsang method, you would have
+ * to parametrize it by a potentially infinite stream of uniform (and
+ * perhaps normal) samples rather than a fixed number, which doesn't
+ * make for quite as nice automatic testing as for these.
+ */
+
+/**
+ * Deterministically sample from the interval [a, b], indexed by a
+ * uniform random floating-point number p0 in (0, 1].
+ *
+ * Note that even if p0 is nonzero, the result may be equal to a, if
+ * ulp(a)/2 is nonnegligible, e.g. if a = 1.  For maximum resolution,
+ * arrange |a| <= |b|.
+ */
+STATIC double
+sample_uniform_interval(double p0, double a, double b)
+{
+  /*
+   * XXX Prove that the distribution is, in fact, uniform on
+   * [a,b], particularly around p0 = 1, or at least has very
+   * small deviation from uniform, quantified appropriately
+   * (e.g., like in Monahan 1984, or by KL divergence).  It
+   * almost certainly does but it would be nice to quantify the
+   * error.
+   */
+  if ((a <= 0 && 0 <= b) || (b <= 0 && 0 <= a)) {
+    /*
+     * When ab < 0, (1 - t) a + t b is monotonic, since for
+     * a <= b it is a sum of nondecreasing functions of t,
+     * and for b <= a, of nonincreasing functions of t.
+     * Further, clearly at 0 and 1 it attains a and b,
+     * respectively.  Hence it is bounded within [a, b].
+     */
+    return (1 - p0)*a + p0*b;
+  } else {
+    /*
+     * a + (b - a) t is monotonic -- it is obviously a
+     * nondecreasing function of t for a <= b.  Further, it
+     * attains a at 0, and while it may overshoot b at 1,
+     * we have a
+     *
+     * Theorem.  If 0 <= t < 1, then the floating-point
+     * evaluation of a + (b - a) t is bounded in [a, b].
+     *
+     * Lemma 1.  If 0 <= t < 1 is a floating-point number,
+     * then for any normal floating-point number x except
+     * the smallest in magnitude, |round(x*t)| < |x|.
+     *
+     * Proof.  WLOG, assume x >= 0.  Since the rounding
+     * function and t |---> x*t are nondecreasing, their
+     * composition t |---> round(x*t) is also
+     * nondecreasing, so it suffices to consider the
+     * largest floating-point number below 1, in particular
+     * t = 1 - ulp(1)/2.
+     *
+     * Case I: If x is a power of two, then the next
+     * floating-point number below x is x - ulp(x)/2 = x -
+     * x*ulp(1)/2 = x*(1 - ulp(1)/2) = x*t, so, since x*t
+     * is a floating-point number, multiplication is exact,
+     * and thus round(x*t) = x*t < x.
+     *
+     * Case II: If x is not a power of two, then the
+     * greatest lower bound of real numbers rounded to x is
+     * x - ulp(x)/2 = x - ulp(T(x))/2 = x - T(x)*ulp(1)/2,
+     * where T(X) is the largest power of two below x.
+     * Anything below this bound is rounded to a
+     * floating-point number smaller than x, and x*t = x*(1
+     * - ulp(1)/2) = x - x*ulp(1)/2 < x - T(x)*ulp(1)/2
+     * since T(x) < x, so round(x*t) < x*t < x.  QED.
+     *
+     * Lemma 2.  If x and y are subnormal, then round(x +
+     * y) = x + y.
+     *
+     * Proof.  It is a matter of adding the significands,
+     * since if we treat subnormals as having an implicit
+     * zero bit before the `binary' point, their exponents
+     * are all the same.  There is at most one carry/borrow
+     * bit, which can always be acommodated either in a
+     * subnormal, or, at largest, in the implicit one bit
+     * of a normal.
+     *
+     * Lemma 3.  Let x and y be floating-point numbers.  If
+     * round(x - y) is subnormal or zero, then it is equal
+     * to x - y.
+     *
+     * Proof.  Case I (equal): round(x - y) = 0 iff x = y;
+     * hence if round(x - y) = 0, then round(x - y) = 0 = x
+     * - y.
+     *
+     * Case II (subnormal/subnormal): If x and y are both
+     * subnormal, this follows directly from Lemma 2.
+     *
+     * Case IIIa (normal/subnormal): If x is normal and y
+     * is subnormal, then x and y must share sign, or else
+     * x - y would be larger than x and thus rounded to
+     * normal.  If s is the smallest normal positive
+     * floating-point number, |x| < 2s since by
+     * construction 2s - |y| is normal for all subnormal y.
+     * This means that x and y must have the same exponent,
+     * so the difference is the difference of significands,
+     * which is exact.
+     *
+     * Case IIIb (subnormal/normal): Same as case IIIa for
+     * -(y - x).
+     *
+     * Case IV (normal/normal): If x and y are both normal,
+     * then they must share sign, or else x - y would be
+     * larger than x and thus rounded to normal.  Note that
+     * |y| < 2|x|, for if |y| >= 2|x|, then |x| - |y| <=
+     * -|x| but -|x| is normal like x.  Also, |x|/2 < |y|:
+     * if |x|/2 is subnormal, it must hold because y is
+     * normal; if |x|/2 is normal, then |x|/2 >= s, so
+     * since |x| - |y| < s,
+     *
+     *  |x|/2 = |x| - |x|/2 <= |x| - s <= |y|;
+     *
+     * that is, |x|/2 < |y| < 2|x|, so by the Sterbenz
+     * lemma, round(x - y) = x - y.  QED.
+     *
+     * Proof of theorem.  WLOG, assume 0 <= a <= b.  Since
+     * round(a + round(round(b - a)*t) is nondecreasing in
+     * t and attains a at 0, the lower end of the bound is
+     * trivial; we must show the upper end of the bound
+     * strictly.  It suffices to show this for the largest
+     * floating-point number below 1, namely 1 - ulp(1)/2.
+     *
+     * Case I: round(b - a) is normal.  Then it is at most
+     * the smallest floating-point number above b - a.  By
+     * Lemma 1, round(round(b - a)*t) < round(b - a).
+     * Since the inequality is strict, and since
+     * round(round(b - a)*t) is a floating-point number
+     * below round(b - a), and since there are no
+     * floating-point numbers between b - a and round(b -
+     * a), we must have round(round(b - a)*t) < b - a.
+     * Then since y |---> round(a + y) is nondecreasing, we
+     * must have
+     *
+     *  round(a + round(round(b - a)*t))
+     *  <= round(a + (b - a))
+     *   = round(b) = b.
+     *
+     * Case II: round(b - a) is subnormal.  In this case,
+     * Lemma 1 falls apart -- we are not guaranteed the
+     * strict inequality.  However, by Lemma 3, the
+     * difference is exact: round(b - a) = b - a.  Thus,
+     *
+     *  round(a + round(round(b - a)*t))
+     *  <= round(a + round((b - a)*t))
+     *  <= round(a + (b - a))
+     *   = round(b)
+     *   = b,
+     *
+     * QED.
+     */
+
+    /* p0 is restricted to [0,1], but we use >= to silence -Wfloat-equal.  */
+    if (p0 >= 1)
+      return b;
+    return a + (b - a)*p0;
+  }
+}
+
+/**
+ * Deterministically sample from the standard logistic distribution,
+ * indexed by a uniform random 32-bit integer s and uniform random
+ * floating-point numbers t and p0 in (0, 1].
+ */
+STATIC double
+sample_logistic(uint32_t s, double t, double p0)
+{
+  double sign = (s & 1) ? -1 : +1;
+  double r;
+
+  /*
+   * We carve up the interval (0, 1) into subregions to compute
+   * the inverse CDF precisely:
+   *
+   * A = (0, 1/(1 + e)] ---> (-\infty, -1]
+   * B = [1/(1 + e), 1/2] ---> [-1, 0]
+   * C = [1/2, 1 - 1/(1 + e)] ---> [0, 1]
+   * D = [1 - 1/(1 + e), 1) ---> [1, +\infty)
+   *
+   * Cases D and C are mirror images of cases A and B,
+   * respectively, so we choose between them by the sign chosen
+   * by a fair coin toss.  We choose between cases A and B by a
+   * coin toss weighted by
+   *
+   *    2/(1 + e) = 1 - [1/2 - 1/(1 + e)]/(1/2):
+   *
+   * if it comes up heads, scale p0 into a uniform (0, 1/(1 + e)]
+   * sample p; if it comes up tails, scale p0 into a uniform (0,
+   * 1/2 - 1/(1 + e)] sample and compute the inverse CDF of p =
+   * 1/2 - p0.
+   */
+  if (t <= 2/(1 + exp(1))) {
+    /* p uniform in (0, 1/(1 + e)], represented by p.  */
+    p0 /= 1 + exp(1);
+    r = logit(p0);
+  } else {
+    /*
+     * p uniform in [1/(1 + e), 1/2), actually represented
+     * by p0 = 1/2 - p uniform in (0, 1/2 - 1/(1 + e)], so
+     * that p = 1/2 - p.
+     */
+    p0 *= 0.5 - 1/(1 + exp(1));
+    r = logithalf(p0);
+  }
+
+  /*
+   * We have chosen from the negative half of the standard
+   * logistic distribution, which is symmetric with the positive
+   * half.  Now use the sign to choose uniformly between them.
+   */
+  return sign*r;
+}
+
+/**
+ * Deterministically sample from the logistic distribution scaled by
+ * sigma and translated by mu.
+ */
+static double
+sample_logistic_locscale(uint32_t s, double t, double p0, double mu,
+    double sigma)
+{
+
+  return mu + sigma*sample_logistic(s, t, p0);
+}
+
+/**
+ * Deterministically sample from the standard log-logistic
+ * distribution, indexed by a uniform random 32-bit integer s and a
+ * uniform random floating-point number p0 in (0, 1].
+ */
+STATIC double
+sample_log_logistic(uint32_t s, double p0)
+{
+
+  /*
+   * Carve up the interval (0, 1) into (0, 1/2] and [1/2, 1); the
+   * condition numbers of the icdf and the isf coincide at 1/2.
+   */
+  p0 *= 0.5;
+  if ((s & 1) == 0) {
+    /* p = p0 in (0, 1/2] */
+    return p0/(1 - p0);
+  } else {
+    /* p = 1 - p0 in [1/2, 1) */
+    return (1 - p0)/p0;
+  }
+}
+
+/**
+ * Deterministically sample from the log-logistic distribution with
+ * scale alpha and shape beta.
+ */
+static double
+sample_log_logistic_scaleshape(uint32_t s, double p0, double alpha,
+    double beta)
+{
+  double x = sample_log_logistic(s, p0);
+
+  return alpha*pow(x, 1/beta);
+}
+
+/**
+ * Deterministically sample from the standard exponential distribution,
+ * indexed by a uniform random 32-bit integer s and a uniform random
+ * floating-point number p0 in (0, 1].
+ */
+static double
+sample_exponential(uint32_t s, double p0)
+{
+  /*
+   * We would like to evaluate log(p) for p near 0, and log1p(-p)
+   * for p near 1.  Simply carve the interval into (0, 1/2] and
+   * [1/2, 1) by a fair coin toss.
+   */
+  p0 *= 0.5;
+  if ((s & 1) == 0)
+    /* p = p0 in (0, 1/2] */
+    return -log(p0);
+  else
+    /* p = 1 - p0 in [1/2, 1) */
+    return -log1p(-p0);
+}
+
+/**
+ * Deterministically sample from a Weibull distribution with scale
+ * lambda and shape k -- just an exponential with a shape parameter in
+ * addition to a scale parameter.  (Yes, lambda really is the scale,
+ * _not_ the rate.)
+ */
+STATIC double
+sample_weibull(uint32_t s, double p0, double lambda, double k)
+{
+
+  return lambda*pow(sample_exponential(s, p0), 1/k);
+}
+
+/**
+ * Deterministically sample from the generalized Pareto distribution
+ * with shape xi, indexed by a uniform random 32-bit integer s and a
+ * uniform random floating-point number p0 in (0, 1].
+ */
+STATIC double
+sample_genpareto(uint32_t s, double p0, double xi)
+{
+  double x = sample_exponential(s, p0);
+
+  /*
+   * Write f(xi) = (e^{xi x} - 1)/xi for xi near zero as the
+   * absolutely convergent Taylor series
+   *
+   * f(x) = (1/xi) (xi x + \sum_{n=2}^\infty (xi x)^n/n!)
+   *      = x + (1/xi) \sum_{n=2}^\inty (xi x)^n/n!
+   *      = x + \sum_{n=2}^\infty xi^{n-1} x^n/n!
+   *      = x + x \sum_{n=2}^\infty (xi x)^{n-1}/n!
+   *      = x (1 + \sum_{n=2}^\infty (xi x)^{n-1}/n!).
+   *
+   * d = \sum_{n=2}^\infty (xi x)^{n-1}/n! is the relative error
+   * of f(x) from x.  If |xi| < eps/4x, then
+   *
+   * |d| <= \sum_{n=2}^\infty |xi x|^{n-1}/n!
+   *     <= \sum_{n=2}^\infty (eps/4)^{n-1}/n!
+   *     <= \sum_{n=1}^\infty (eps/4)
+   *      = (eps/4) \sum_{n=0}^\infty (eps/4)^n
+   *      = (eps/4)/(1 - eps/4)
+   *      < eps/2,
+   *
+   * for any 0 < eps < 2.  Hence, as long as |xi| < eps/2x, f(xi)
+   * = x (1 + d) for |d| <= eps/2, so x = f(xi) (1 + d') for |d'|
+   * <= eps.  What bound should we use for x?
+   *
+   * - If x is exponentially distributed, x > 200 with
+   *   probability below e^{-200} << 2^{-256}, i.e. never.
+   *
+   * - If x is computed by -log(U) for U in (0, 1], x is
+   *   guaranteed to be below 1000 in IEEE 754 binary64
+   *   floating-point.
+   *
+   * We can safely cut xi off at 1e-20 < eps/4000 and attain an
+   * error bounded by 0.5 ulp for this expression.
+   */
+  return (fabs(xi) < 1e-20 ? x : expm1(xi*x)/xi);
+}
+
+/**
+ * Deterministically sample from a generalized Pareto distribution with
+ * shape xi, scaled by sigma and translated by mu.
+ */
+static double
+sample_genpareto_locscale(uint32_t s, double p0, double mu, double sigma,
+    double xi)
+{
+
+  return mu + sigma*sample_genpareto(s, p0, xi);
+}
+
+/**
+ * Deterministically sample from the geometric distribution with
+ * per-trial success probability p.
+ *
+ * XXX Quantify the error (KL divergence?) of this
+ * ceiling-of-exponential sampler from a true geometric distribution,
+ * which we could get by rejection sampling.  Relevant papers:
+ *
+ *      John F. Monahan, `Accuracy in Random Number Generation',
+ *      Mathematics of Computation 45(172), October 1984, pp. 559--568.
+*https://pdfs.semanticscholar.org/aca6/74b96da1df77b2224e8cfc5dd6d61a471632.pdf
+ *
+ *      Karl Bringmann and Tobias Friedrich, `Exact and Efficient
+ *      Generation of Geometric Random Variates and Random Graphs', in
+ *      Proceedings of the 40th International Colloaquium on Automata,
+ *      Languages, and Programming -- ICALP 2013, Springer LNCS 7965,
+ *      pp.267--278.
+ *      https://doi.org/10.1007/978-3-642-39206-1_23
+ *      https://people.mpi-inf.mpg.de/~kbringma/paper/2013ICALP-1.pdf
+ */
+static double
+sample_geometric(uint32_t s, double p0, double p)
+{
+  double x = sample_exponential(s, p0);
+
+  /* This is actually a check against 1, but we do >= so that the compiler
+     does not raise a -Wfloat-equal */
+  if (p >= 1)
+    return 1;
+
+  return ceil(-x/log1p(-p));
+}
+
+/*******************************************************************/
+
+/** Public API for probability distributions:
+ *
+ *  For each probability distribution we define each public functions
+ *  (sample/cdf/sf/icdf/isf) as part of its dist_ops structure.
+ */
+
+const char *
+dist_name(const struct dist *dist)
+{
+  return dist->ops->name;
+}
+
+double
+dist_sample(const struct dist *dist)
+{
+  return dist->ops->sample(dist);
+}
+
+double
+dist_cdf(const struct dist *dist, double x)
+{
+  return dist->ops->cdf(dist, x);
+}
+
+double
+dist_sf(const struct dist *dist, double x)
+{
+  return dist->ops->sf(dist, x);
+}
+
+double
+dist_icdf(const struct dist *dist, double p)
+{
+  return dist->ops->icdf(dist, p);
+}
+
+double
+dist_isf(const struct dist *dist, double p)
+{
+  return dist->ops->isf(dist, p);
+}
+
+/** Functions for uniform distribution */
+
+static double
+uniform_sample(const struct dist *dist)
+{
+  const struct uniform *U = const_container_of(dist, struct uniform,
+    base);
+  double p0 = random_uniform_01();
+
+  return sample_uniform_interval(p0, U->a, U->b);
+}
+
+static double
+uniform_cdf(const struct dist *dist, double x)
+{
+  const struct uniform *U = const_container_of(dist, struct uniform,
+    base);
+
+  if (x < U->a)
+    return 0;
+  else if (x < U->b)
+    return (x - U->a)/(U->b - U->a);
+  else
+    return 1;
+}
+
+static double
+uniform_sf(const struct dist *dist, double x)
+{
+  const struct uniform *U = const_container_of(dist, struct uniform,
+    base);
+
+  if (x > U->b)
+    return 0;
+  else if (x > U->a)
+    return (U->b - x)/(U->b - U->a);
+  else
+    return 1;
+}
+
+static double
+uniform_icdf(const struct dist *dist, double p)
+{
+  const struct uniform *U = const_container_of(dist, struct uniform,
+    base);
+  double w = U->b - U->a;
+
+  return (p < 0.5 ? (U->a + w*p) : (U->b - w*(1 - p)));
+}
+
+static double
+uniform_isf(const struct dist *dist, double p)
+{
+  const struct uniform *U = const_container_of(dist, struct uniform,
+    base);
+  double w = U->b - U->a;
+
+  return (p < 0.5 ? (U->b - w*p) : (U->a + w*(1 - p)));
+}
+
+const struct dist_ops uniform_ops = {
+  .name = "uniform",
+  .sample = uniform_sample,
+  .cdf = uniform_cdf,
+  .sf = uniform_sf,
+  .icdf = uniform_icdf,
+  .isf = uniform_isf,
+};
+
+/** Functions for logistic distribution: */
+
+static double
+logistic_sample(const struct dist *dist)
+{
+  const struct logistic *L = const_container_of(dist, struct logistic,
+    base);
+  uint32_t s = crypto_rand_u32();
+  double t = random_uniform_01();
+  double p0 = random_uniform_01();
+
+  return sample_logistic_locscale(s, t, p0, L->mu, L->sigma);
+}
+
+static double
+logistic_cdf(const struct dist *dist, double x)
+{
+  const struct logistic *L = const_container_of(dist, struct logistic,
+    base);
+
+  return cdf_logistic(x, L->mu, L->sigma);
+}
+
+static double
+logistic_sf(const struct dist *dist, double x)
+{
+  const struct logistic *L = const_container_of(dist, struct logistic,
+    base);
+
+  return sf_logistic(x, L->mu, L->sigma);
+}
+
+static double
+logistic_icdf(const struct dist *dist, double p)
+{
+  const struct logistic *L = const_container_of(dist, struct logistic,
+    base);
+
+  return icdf_logistic(p, L->mu, L->sigma);
+}
+
+static double
+logistic_isf(const struct dist *dist, double p)
+{
+  const struct logistic *L = const_container_of(dist, struct logistic,
+    base);
+
+  return isf_logistic(p, L->mu, L->sigma);
+}
+
+const struct dist_ops logistic_ops = {
+  .name = "logistic",
+  .sample = logistic_sample,
+  .cdf = logistic_cdf,
+  .sf = logistic_sf,
+  .icdf = logistic_icdf,
+  .isf = logistic_isf,
+};
+
+/** Functions for log-logistic distribution: */
+
+static double
+log_logistic_sample(const struct dist *dist)
+{
+  const struct log_logistic *LL = const_container_of(dist, struct
+    log_logistic, base);
+  uint32_t s = crypto_rand_u32();
+  double p0 = random_uniform_01();
+
+  return sample_log_logistic_scaleshape(s, p0, LL->alpha, LL->beta);
+}
+
+static double
+log_logistic_cdf(const struct dist *dist, double x)
+{
+  const struct log_logistic *LL = const_container_of(dist,
+    struct log_logistic, base);
+
+  return cdf_log_logistic(x, LL->alpha, LL->beta);
+}
+
+static double
+log_logistic_sf(const struct dist *dist, double x)
+{
+  const struct log_logistic *LL = const_container_of(dist,
+    struct log_logistic, base);
+
+  return sf_log_logistic(x, LL->alpha, LL->beta);
+}
+
+static double
+log_logistic_icdf(const struct dist *dist, double p)
+{
+  const struct log_logistic *LL = const_container_of(dist,
+    struct log_logistic, base);
+
+  return icdf_log_logistic(p, LL->alpha, LL->beta);
+}
+
+static double
+log_logistic_isf(const struct dist *dist, double p)
+{
+  const struct log_logistic *LL = const_container_of(dist,
+    struct log_logistic, base);
+
+  return isf_log_logistic(p, LL->alpha, LL->beta);
+}
+
+const struct dist_ops log_logistic_ops = {
+  .name = "log logistic",
+  .sample = log_logistic_sample,
+  .cdf = log_logistic_cdf,
+  .sf = log_logistic_sf,
+  .icdf = log_logistic_icdf,
+  .isf = log_logistic_isf,
+};
+
+/** Functions for Weibull distribution */
+
+static double
+weibull_sample(const struct dist *dist)
+{
+  const struct weibull *W = const_container_of(dist, struct weibull,
+    base);
+  uint32_t s = crypto_rand_u32();
+  double p0 = random_uniform_01();
+
+  return sample_weibull(s, p0, W->lambda, W->k);
+}
+
+static double
+weibull_cdf(const struct dist *dist, double x)
+{
+  const struct weibull *W = const_container_of(dist, struct weibull,
+    base);
+
+  return cdf_weibull(x, W->lambda, W->k);
+}
+
+static double
+weibull_sf(const struct dist *dist, double x)
+{
+  const struct weibull *W = const_container_of(dist, struct weibull,
+    base);
+
+  return sf_weibull(x, W->lambda, W->k);
+}
+
+static double
+weibull_icdf(const struct dist *dist, double p)
+{
+  const struct weibull *W = const_container_of(dist, struct weibull,
+    base);
+
+  return icdf_weibull(p, W->lambda, W->k);
+}
+
+static double
+weibull_isf(const struct dist *dist, double p)
+{
+  const struct weibull *W = const_container_of(dist, struct weibull,
+    base);
+
+  return isf_weibull(p, W->lambda, W->k);
+}
+
+const struct dist_ops weibull_ops = {
+  .name = "Weibull",
+  .sample = weibull_sample,
+  .cdf = weibull_cdf,
+  .sf = weibull_sf,
+  .icdf = weibull_icdf,
+  .isf = weibull_isf,
+};
+
+/** Functions for generalized Pareto distributions */
+
+static double
+genpareto_sample(const struct dist *dist)
+{
+  const struct genpareto *GP = const_container_of(dist, struct genpareto,
+    base);
+  uint32_t s = crypto_rand_u32();
+  double p0 = random_uniform_01();
+
+  return sample_genpareto_locscale(s, p0, GP->mu, GP->sigma, GP->xi);
+}
+
+static double
+genpareto_cdf(const struct dist *dist, double x)
+{
+  const struct genpareto *GP = const_container_of(dist, struct genpareto,
+    base);
+
+  return cdf_genpareto(x, GP->mu, GP->sigma, GP->xi);
+}
+
+static double
+genpareto_sf(const struct dist *dist, double x)
+{
+  const struct genpareto *GP = const_container_of(dist, struct genpareto,
+    base);
+
+  return sf_genpareto(x, GP->mu, GP->sigma, GP->xi);
+}
+
+static double
+genpareto_icdf(const struct dist *dist, double p)
+{
+  const struct genpareto *GP = const_container_of(dist, struct genpareto,
+    base);
+
+  return icdf_genpareto(p, GP->mu, GP->sigma, GP->xi);
+}
+
+static double
+genpareto_isf(const struct dist *dist, double p)
+{
+  const struct genpareto *GP = const_container_of(dist, struct genpareto,
+    base);
+
+  return isf_genpareto(p, GP->mu, GP->sigma, GP->xi);
+}
+
+const struct dist_ops genpareto_ops = {
+  .name = "generalized Pareto",
+  .sample = genpareto_sample,
+  .cdf = genpareto_cdf,
+  .sf = genpareto_sf,
+  .icdf = genpareto_icdf,
+  .isf = genpareto_isf,
+};
+
+/** Functions for geometric distribution on number of trials before success */
+
+static double
+geometric_sample(const struct dist *dist)
+{
+  const struct geometric *G = const_container_of(dist, struct geometric, base);
+  uint32_t s = crypto_rand_u32();
+  double p0 = random_uniform_01();
+
+  return sample_geometric(s, p0, G->p);
+}
+
+static double
+geometric_cdf(const struct dist *dist, double x)
+{
+  const struct geometric *G = const_container_of(dist, struct geometric, base);
+
+  if (x < 1)
+    return 0;
+  /* 1 - (1 - p)^floor(x) = 1 - e^{floor(x) log(1 - p)} */
+  return -expm1(floor(x)*log1p(-G->p));
+}
+
+static double
+geometric_sf(const struct dist *dist, double x)
+{
+  const struct geometric *G = const_container_of(dist, struct geometric, base);
+
+  if (x < 1)
+    return 0;
+  /* (1 - p)^floor(x) = e^{ceil(x) log(1 - p)} */
+  return exp(floor(x)*log1p(-G->p));
+}
+
+static double
+geometric_icdf(const struct dist *dist, double p)
+{
+  const struct geometric *G = const_container_of(dist, struct geometric, base);
+
+  return log1p(-p)/log1p(-G->p);
+}
+
+static double
+geometric_isf(const struct dist *dist, double p)
+{
+  const struct geometric *G = const_container_of(dist, struct geometric, base);
+
+  return log(p)/log1p(-G->p);
+}
+
+const struct dist_ops geometric_ops = {
+  .name = "geometric (1-based)",
+  .sample = geometric_sample,
+  .cdf = geometric_cdf,
+  .sf = geometric_sf,
+  .icdf = geometric_icdf,
+  .isf = geometric_isf,
+};
diff --git a/src/lib/math/prob_distr.h b/src/lib/math/prob_distr.h
new file mode 100644
index 0000000000..66acb796fd
--- /dev/null
+++ b/src/lib/math/prob_distr.h
@@ -0,0 +1,158 @@
+
+/**
+ * \file prob_distr.h
+ *
+ * \brief Header for prob_distr.c
+ **/
+
+#ifndef TOR_PROB_DISTR_H
+#define TOR_PROB_DISTR_H
+
+#include "lib/cc/compat_compiler.h"
+#include "lib/cc/torint.h"
+#include "lib/testsupport/testsupport.h"
+
+/**
+ * Container for distribution parameters for sampling, CDF, &c.
+ */
+struct dist {
+  const struct dist_ops *ops;
+};
+
+#define DIST_BASE(OPS)  { .ops = (OPS) }
+#define DIST_BASE_TYPED(OPS, OBJ, TYPE)                         \
+  DIST_BASE((OPS) + 0*sizeof(&(OBJ) - (const TYPE *)&(OBJ)))
+
+const char *dist_name(const struct dist *);
+double dist_sample(const struct dist *);
+double dist_cdf(const struct dist *, double x);
+double dist_sf(const struct dist *, double x);
+double dist_icdf(const struct dist *, double p);
+double dist_isf(const struct dist *, double p);
+
+struct dist_ops {
+  const char *name;
+  double (*sample)(const struct dist *);
+  double (*cdf)(const struct dist *, double x);
+  double (*sf)(const struct dist *, double x);
+  double (*icdf)(const struct dist *, double p);
+  double (*isf)(const struct dist *, double p);
+};
+
+/* Geometric distribution on positive number of trials before first success */
+
+struct geometric {
+  struct dist base;
+  double p; /* success probability */
+};
+
+extern const struct dist_ops geometric_ops;
+
+#define GEOMETRIC(OBJ)                                      \
+  DIST_BASE_TYPED(&geometric_ops, OBJ, struct geometric)
+
+/* Pareto distribution */
+
+struct genpareto {
+  struct dist base;
+  double mu;
+  double sigma;
+  double xi;
+};
+
+extern const struct dist_ops genpareto_ops;
+
+#define GENPARETO(OBJ)                                      \
+  DIST_BASE_TYPED(&genpareto_ops, OBJ, struct genpareto)
+
+/* Weibull distribution */
+
+struct weibull {
+  struct dist base;
+  double lambda;
+  double k;
+};
+
+extern const struct dist_ops weibull_ops;
+
+#define WEIBULL(OBJ)                                    \
+  DIST_BASE_TYPED(&weibull_ops, OBJ, struct weibull)
+
+/* Log-logistic distribution */
+
+struct log_logistic {
+  struct dist base;
+  double alpha;
+  double beta;
+};
+
+extern const struct dist_ops log_logistic_ops;
+
+#define LOG_LOGISTIC(OBJ)                                       \
+  DIST_BASE_TYPED(&log_logistic_ops, OBJ, struct log_logistic)
+
+/* Logistic distribution */
+
+struct logistic {
+  struct dist base;
+  double mu;
+  double sigma;
+};
+
+extern const struct dist_ops logistic_ops;
+
+#define LOGISTIC(OBJ)                                   \
+  DIST_BASE_TYPED(&logistic_ops, OBJ, struct logistic)
+
+/* Uniform distribution */
+
+struct uniform {
+  struct dist base;
+  double a;
+  double b;
+};
+
+extern const struct dist_ops uniform_ops;
+
+#define UNIFORM(OBJ)                                    \
+  DIST_BASE_TYPED(&uniform_ops, OBJ, struct uniform)
+
+/** Only by unittests */
+
+#ifdef PROB_DISTR_PRIVATE
+
+STATIC double logithalf(double p0);
+STATIC double logit(double p);
+
+STATIC double random_uniform_01(void);
+
+STATIC double logistic(double x);
+STATIC double cdf_logistic(double x, double mu, double sigma);
+STATIC double sf_logistic(double x, double mu, double sigma);
+STATIC double icdf_logistic(double p, double mu, double sigma);
+STATIC double isf_logistic(double p, double mu, double sigma);
+STATIC double sample_logistic(uint32_t s, double t, double p0);
+
+STATIC double cdf_log_logistic(double x, double alpha, double beta);
+STATIC double sf_log_logistic(double x, double alpha, double beta);
+STATIC double icdf_log_logistic(double p, double alpha, double beta);
+STATIC double isf_log_logistic(double p, double alpha, double beta);
+STATIC double sample_log_logistic(uint32_t s, double p0);
+
+STATIC double cdf_weibull(double x, double lambda, double k);
+STATIC double sf_weibull(double x, double lambda, double k);
+STATIC double icdf_weibull(double p, double lambda, double k);
+STATIC double isf_weibull(double p, double lambda, double k);
+STATIC double sample_weibull(uint32_t s, double p0, double lambda, double k);
+
+STATIC double sample_uniform_interval(double p0, double a, double b);
+
+STATIC double cdf_genpareto(double x, double mu, double sigma, double xi);
+STATIC double sf_genpareto(double x, double mu, double sigma, double xi);
+STATIC double icdf_genpareto(double p, double mu, double sigma, double xi);
+STATIC double isf_genpareto(double p, double mu, double sigma, double xi);
+STATIC double sample_genpareto(uint32_t s, double p0, double xi);
+
+#endif
+
+#endif
diff --git a/src/lib/smartlist_core/smartlist_foreach.h b/src/lib/smartlist_core/smartlist_foreach.h
index 54f08ac47d..c9afebd6a2 100644
--- a/src/lib/smartlist_core/smartlist_foreach.h
+++ b/src/lib/smartlist_core/smartlist_foreach.h
@@ -83,6 +83,19 @@
          ++var ## _sl_idx) {                                    \
       var = (sl)->list[var ## _sl_idx];
 
+/** Iterates over the items in smartlist <b>sl</b> in reverse order, similar to
+ *  SMARTLIST_FOREACH_BEGIN
+ *
+ * NOTE: This macro is incompatible with SMARTLIST_DEL_CURRENT.
+ */
+#define SMARTLIST_FOREACH_REVERSE_BEGIN(sl, type, var)  \
+  STMT_BEGIN                                                       \
+    int var ## _sl_idx, var ## _sl_len=(sl)->num_used;             \
+    type var;                                                      \
+    for (var ## _sl_idx = var ## _sl_len-1; var ## _sl_idx >= 0;   \
+         --var ## _sl_idx) {                                       \
+      var = (sl)->list[var ## _sl_idx];
+
 #define SMARTLIST_FOREACH_END(var)              \
     var = NULL;                                 \
     (void) var ## _sl_idx;                      \
diff --git a/src/lib/time/.may_include b/src/lib/time/.may_include
index 40a18805ac..ae01431b60 100644
--- a/src/lib/time/.may_include
+++ b/src/lib/time/.may_include
@@ -7,6 +7,7 @@ lib/log/*.h
 lib/subsys/*.h
 lib/time/*.h
 lib/wallclock/*.h
+lib/defs/time.h
 
 # For load_windows_system_lib.
 lib/fs/winlib.h
 \ No newline at end of file
diff --git a/src/lib/time/compat_time.c b/src/lib/time/compat_time.c
index f1ddb4fdc4..387b0fad22 100644
--- a/src/lib/time/compat_time.c
+++ b/src/lib/time/compat_time.c
@@ -787,8 +787,8 @@ monotime_absolute_nsec(void)
   return monotime_diff_nsec(&initialized_at, &now);
 }
 
-uint64_t
-monotime_absolute_usec(void)
+MOCK_IMPL(uint64_t,
+monotime_absolute_usec,(void))
 {
   return monotime_absolute_nsec() / 1000;
 }
diff --git a/src/lib/time/compat_time.h b/src/lib/time/compat_time.h
index c5337e9998..3c8797c450 100644
--- a/src/lib/time/compat_time.h
+++ b/src/lib/time/compat_time.h
@@ -199,7 +199,7 @@ uint64_t monotime_absolute_nsec(void);
 /**
  * Return the number of microseconds since the timer system was initialized.
  */
-uint64_t monotime_absolute_usec(void);
+MOCK_DECL(uint64_t, monotime_absolute_usec,(void));
 /**
  * Return the number of milliseconds since the timer system was initialized.
  */
diff --git a/src/lib/time/tvdiff.c b/src/lib/time/tvdiff.c
index bc8a1166e7..9dfb63c26f 100644
--- a/src/lib/time/tvdiff.c
+++ b/src/lib/time/tvdiff.c
@@ -11,6 +11,7 @@
 #include "lib/time/tvdiff.h"
 
 #include "lib/cc/compat_compiler.h"
+#include "lib/defs/time.h"
 #include "lib/log/log.h"
 
 #ifdef _WIN32
@@ -20,8 +21,6 @@
 #include <sys/time.h>
 #endif
 
-#define TOR_USEC_PER_SEC 1000000
-
 /** Return the difference between start->tv_sec and end->tv_sec.
  * Returns INT64_MAX on overflow and underflow.
  */
diff --git a/src/rust/protover/protover.rs b/src/rust/protover/protover.rs
index 8624afeafa..0b2a78c210 100644
--- a/src/rust/protover/protover.rs
+++ b/src/rust/protover/protover.rs
@@ -46,6 +46,7 @@ pub enum Protocol {
     LinkAuth,
     Microdesc,
     Relay,
+    Padding,
 }
 
 impl fmt::Display for Protocol {
@@ -73,6 +74,7 @@ impl FromStr for Protocol {
             "LinkAuth" => Ok(Protocol::LinkAuth),
             "Microdesc" => Ok(Protocol::Microdesc),
             "Relay" => Ok(Protocol::Relay),
+            "Padding" => Ok(Protocol::Padding),
             _ => Err(ProtoverError::UnknownProtocol),
         }
     }
@@ -163,7 +165,8 @@ pub(crate) fn get_supported_protocols_cstr() -> &'static CStr {
              Link=1-5 \
              LinkAuth=3 \
              Microdesc=1-2 \
-             Relay=1-2"
+             Relay=1-2 \
+             Padding=1"
         )
     } else {
         cstr!(
@@ -176,7 +179,8 @@ pub(crate) fn get_supported_protocols_cstr() -> &'static CStr {
              Link=1-5 \
              LinkAuth=1,3 \
              Microdesc=1-2 \
-             Relay=1-2"
+             Relay=1-2 \
+             Padding=1"
         )
     }
 }
diff --git a/src/test/Makefile.nmake b/src/test/Makefile.nmake
index aa16a22b52..ca6a84cf8a 100644
--- a/src/test/Makefile.nmake
+++ b/src/test/Makefile.nmake
@@ -19,6 +19,7 @@ TEST_OBJECTS = test.obj test_addr.obj test_channel.obj test_channeltls.obj \
 	test_cell_formats.obj test_relay.obj test_replay.obj \
 	test_channelpadding.obj \
 	test_circuitstats.obj \
+	test_circuitpadding.obj \
 	test_scheduler.obj test_introduce.obj test_hs.obj tinytest.obj
 
 tinytest.obj: ..\ext\tinytest.c
diff --git a/src/test/include.am b/src/test/include.am
index 4725e8cbaa..b276500fd5 100644
--- a/src/test/include.am
+++ b/src/test/include.am
@@ -101,6 +101,7 @@ src_test_test_SOURCES += \
 	src/test/test_cell_queue.c \
 	src/test/test_channel.c \
 	src/test/test_channelpadding.c \
+	src/test/test_circuitpadding.c \
 	src/test/test_channeltls.c \
 	src/test/test_checkdir.c \
 	src/test/test_circuitlist.c \
@@ -156,6 +157,7 @@ src_test_test_SOURCES += \
 	src/test/test_periodic_event.c \
 	src/test/test_policy.c \
 	src/test/test_process.c \
+	src/test/test_prob_distr.c \
 	src/test/test_procmon.c \
 	src/test/test_proto_http.c \
 	src/test/test_proto_misc.c \
@@ -206,6 +208,7 @@ src_test_test_slow_SOURCES += \
 	src/test/test_slow.c \
 	src/test/test_crypto_slow.c \
 	src/test/test_process_slow.c \
+	src/test/test_prob_distr.c \
 	src/test/testing_common.c \
 	src/test/testing_rsakeys.c \
 	src/ext/tinytest.c
diff --git a/src/test/prob_distr_mpfr_ref.c b/src/test/prob_distr_mpfr_ref.c
new file mode 100644
index 0000000000..4e64d731cd
--- /dev/null
+++ b/src/test/prob_distr_mpfr_ref.c
@@ -0,0 +1,64 @@
+/* Copyright 2012-2018, The Tor Project, Inc
+ * See LICENSE for licensing information */
+
+/** prob_distr_mpfr_ref.c
+ *
+ * Example reference file for GNU MPFR vectors tested in test_prob_distr.c .
+ * Code by Riastradh.
+ */
+
+#include <complex.h>
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+
+/* Must come after <stdio.h> so we get mpfr_printf.  */
+#include <mpfr.h>
+
+/*  gcc -o mpfr prob_distr_mpfr_ref.c -lmpfr -lm */
+
+/* Computes logit(p) for p = .49999 */
+int
+main(void)
+{
+  mpfr_t p, q, r;
+  mpfr_init(p);
+  mpfr_set_prec(p, 200);
+  mpfr_init(q);
+  mpfr_set_prec(q, 200);
+  mpfr_init(r);
+  mpfr_set_prec(r, 200);
+  mpfr_set_d(p, .49999, MPFR_RNDN);
+  mpfr_set_d(q, 1, MPFR_RNDN);
+  /* r := q - p = 1 - p */
+  mpfr_sub(r, q, p, MPFR_RNDN);
+  /* q := p/r = p/(1 - p) */
+  mpfr_div(q, p, r, MPFR_RNDN);
+  /* r := log(q) = log(p/(1 - p)) */
+  mpfr_log(r, q, MPFR_RNDN);
+  mpfr_printf("mpfr 200-bit\t%.128Rg\n", r);
+
+  /*
+   * Print a double approximation to logit three different ways.  All
+   * three agree bit for bit on the libms I tried, with the nextafter
+   * adjustment (which is well within the 10 eps relative error bound
+   * advertised).  Apparently I must have used the Goldberg expression
+   * for what I wrote down in the test case.
+   */
+  printf("mpfr 53-bit\t%.17g\n", nextafter(mpfr_get_d(r, MPFR_RNDN), 0), 0);
+  volatile double p0 = .49999;
+  printf("log1p\t\t%.17g\n", nextafter(-log1p((1 - 2*p0)/p0), 0));
+  volatile double x = (1 - 2*p0)/p0;
+  volatile double xp1 = x + 1;
+  printf("Goldberg\t%.17g\n", -x*log(xp1)/(xp1 - 1));
+
+  /*
+   * Print a bad approximation, using the naive expression, to see a
+   * lot of wrong digits, far beyond the 10 eps relative error attained
+   * by -log1p((1 - 2*p)/p).
+   */
+  printf("naive\t\t%.17g\n", log(p0/(1 - p0)));
+
+  fflush(stdout);
+  return ferror(stdout);
+}
diff --git a/src/test/test.c b/src/test/test.c
index 13e8c71709..902565dfbe 100644
--- a/src/test/test.c
+++ b/src/test/test.c
@@ -845,6 +845,7 @@ struct testgroup_t testgroups[] = {
   { "channeltls/", channeltls_tests },
   { "checkdir/", checkdir_tests },
   { "circuitbuild/", circuitbuild_tests },
+  { "circuitpadding/", circuitpadding_tests },
   { "circuitlist/", circuitlist_tests },
   { "circuitmux/", circuitmux_tests },
   { "circuitstats/", circuitstats_tests },
@@ -900,6 +901,7 @@ struct testgroup_t testgroups[] = {
   { "parsecommon/", parsecommon_tests },
   { "periodic-event/" , periodic_event_tests },
   { "policy/" , policy_tests },
+  { "prob_distr/", prob_distr_tests },
   { "procmon/", procmon_tests },
   { "process/", process_tests },
   { "proto/http/", proto_http_tests },
diff --git a/src/test/test.h b/src/test/test.h
index 9f754469c8..39953e9f7e 100644
--- a/src/test/test.h
+++ b/src/test/test.h
@@ -187,6 +187,7 @@ extern struct testcase_t cell_format_tests[];
 extern struct testcase_t cell_queue_tests[];
 extern struct testcase_t channel_tests[];
 extern struct testcase_t channelpadding_tests[];
+extern struct testcase_t circuitpadding_tests[];
 extern struct testcase_t channeltls_tests[];
 extern struct testcase_t checkdir_tests[];
 extern struct testcase_t circuitbuild_tests[];
@@ -242,6 +243,8 @@ extern struct testcase_t parsecommon_tests[];
 extern struct testcase_t pem_tests[];
 extern struct testcase_t periodic_event_tests[];
 extern struct testcase_t policy_tests[];
+extern struct testcase_t prob_distr_tests[];
+extern struct testcase_t slow_stochastic_prob_distr_tests[];
 extern struct testcase_t procmon_tests[];
 extern struct testcase_t process_tests[];
 extern struct testcase_t proto_http_tests[];
diff --git a/src/test/test_circuitpadding.c b/src/test/test_circuitpadding.c
new file mode 100644
index 0000000000..f4d003969e
--- /dev/null
+++ b/src/test/test_circuitpadding.c
@@ -0,0 +1,2356 @@
+#define TOR_CHANNEL_INTERNAL_
+#define TOR_TIMERS_PRIVATE
+#define CIRCUITPADDING_PRIVATE
+#define NETWORKSTATUS_PRIVATE
+
+#include "core/or/or.h"
+#include "test.h"
+#include "lib/testsupport/testsupport.h"
+#include "core/or/connection_or.h"
+#include "core/or/channel.h"
+#include "core/or/channeltls.h"
+#include <event.h>
+#include "lib/evloop/compat_libevent.h"
+#include "lib/time/compat_time.h"
+#include "lib/defs/time.h"
+#include "core/or/relay.h"
+#include "core/or/circuitlist.h"
+#include "core/or/circuitbuild.h"
+#include "core/or/circuitpadding.h"
+#include "core/crypto/relay_crypto.h"
+#include "core/or/protover.h"
+#include "feature/nodelist/nodelist.h"
+#include "lib/evloop/compat_libevent.h"
+#include "app/config/config.h"
+
+#include "feature/nodelist/routerstatus_st.h"
+#include "feature/nodelist/networkstatus_st.h"
+#include "feature/nodelist/node_st.h"
+#include "core/or/cell_st.h"
+#include "core/or/crypt_path_st.h"
+#include "core/or/or_circuit_st.h"
+#include "core/or/origin_circuit_st.h"
+
+extern smartlist_t *connection_array;
+
+circid_t get_unique_circ_id_by_chan(channel_t *chan);
+void helper_create_basic_machine(void);
+static void helper_create_conditional_machines(void);
+
+static or_circuit_t * new_fake_orcirc(channel_t *nchan, channel_t *pchan);
+channel_t *new_fake_channel(void);
+void test_circuitpadding_negotiation(void *arg);
+void test_circuitpadding_wronghop(void *arg);
+void test_circuitpadding_conditions(void *arg);
+
+void test_circuitpadding_serialize(void *arg);
+void test_circuitpadding_rtt(void *arg);
+void test_circuitpadding_tokens(void *arg);
+void test_circuitpadding_circuitsetup_machine(void *arg);
+
+static void
+simulate_single_hop_extend(circuit_t *client, circuit_t *mid_relay,
+                           int padding);
+void free_fake_orcirc(circuit_t *circ);
+void free_fake_origin_circuit(origin_circuit_t *circ);
+
+static int deliver_negotiated = 1;
+static int64_t curr_mocked_time;
+
+static node_t padding_node;
+static node_t non_padding_node;
+
+static channel_t dummy_channel;
+static circpad_machine_spec_t circ_client_machine;
+
+static void
+timers_advance_and_run(int64_t msec_update)
+{
+  curr_mocked_time += msec_update*TOR_NSEC_PER_MSEC;
+  monotime_coarse_set_mock_time_nsec(curr_mocked_time);
+  monotime_set_mock_time_nsec(curr_mocked_time);
+  timers_run_pending();
+}
+
+static void
+nodes_init(void)
+{
+  padding_node.rs = tor_malloc_zero(sizeof(routerstatus_t));
+  padding_node.rs->pv.supports_padding = 1;
+
+  non_padding_node.rs = tor_malloc_zero(sizeof(routerstatus_t));
+  non_padding_node.rs->pv.supports_padding = 0;
+}
+
+static void
+nodes_free(void)
+{
+  tor_free(padding_node.rs);
+
+  tor_free(non_padding_node.rs);
+}
+
+static const node_t *
+node_get_by_id_mock(const char *identity_digest)
+{
+  if (identity_digest[0] == 1) {
+    return &padding_node;
+  } else if (identity_digest[0] == 0) {
+    return &non_padding_node;
+  }
+
+  return NULL;
+}
+
+static or_circuit_t *
+new_fake_orcirc(channel_t *nchan, channel_t *pchan)
+{
+  or_circuit_t *orcirc = NULL;
+  circuit_t *circ = NULL;
+  crypt_path_t tmp_cpath;
+  char whatevs_key[CPATH_KEY_MATERIAL_LEN];
+
+  orcirc = tor_malloc_zero(sizeof(*orcirc));
+  circ = &(orcirc->base_);
+  circ->magic = OR_CIRCUIT_MAGIC;
+
+  //circ->n_chan = nchan;
+  circ->n_circ_id = get_unique_circ_id_by_chan(nchan);
+  circ->n_mux = NULL; /* ?? */
+  cell_queue_init(&(circ->n_chan_cells));
+  circ->n_hop = NULL;
+  circ->streams_blocked_on_n_chan = 0;
+  circ->streams_blocked_on_p_chan = 0;
+  circ->n_delete_pending = 0;
+  circ->p_delete_pending = 0;
+  circ->received_destroy = 0;
+  circ->state = CIRCUIT_STATE_OPEN;
+  circ->purpose = CIRCUIT_PURPOSE_OR;
+  circ->package_window = CIRCWINDOW_START_MAX;
+  circ->deliver_window = CIRCWINDOW_START_MAX;
+  circ->n_chan_create_cell = NULL;
+
+  //orcirc->p_chan = pchan;
+  orcirc->p_circ_id = get_unique_circ_id_by_chan(pchan);
+  cell_queue_init(&(orcirc->p_chan_cells));
+
+  circuit_set_p_circid_chan(orcirc, orcirc->p_circ_id, pchan);
+  circuit_set_n_circid_chan(circ, circ->n_circ_id, nchan);
+
+  memset(&tmp_cpath, 0, sizeof(tmp_cpath));
+  if (circuit_init_cpath_crypto(&tmp_cpath, whatevs_key,
+                                sizeof(whatevs_key), 0, 0)<0) {
+    log_warn(LD_BUG,"Circuit initialization failed");
+    return NULL;
+  }
+  orcirc->crypto = tmp_cpath.crypto;
+
+  return orcirc;
+}
+
+void
+free_fake_orcirc(circuit_t *circ)
+{
+  or_circuit_t *orcirc = TO_OR_CIRCUIT(circ);
+
+  relay_crypto_clear(&orcirc->crypto);
+
+  circpad_circuit_free_all_machineinfos(circ);
+  tor_free(circ);
+}
+
+void
+free_fake_origin_circuit(origin_circuit_t *circ)
+{
+  circpad_circuit_free_all_machineinfos(TO_CIRCUIT(circ));
+  circuit_clear_cpath(circ);
+  tor_free(circ);
+}
+
+void dummy_nop_timer(void);
+
+//static int dont_stop_libevent = 0;
+
+static circuit_t *client_side;
+static circuit_t *relay_side;
+
+static int n_client_cells = 0;
+static int n_relay_cells = 0;
+
+static int
+circuit_package_relay_cell_mock(cell_t *cell, circuit_t *circ,
+                           cell_direction_t cell_direction,
+                           crypt_path_t *layer_hint, streamid_t on_stream,
+                           const char *filename, int lineno);
+
+static void
+circuitmux_attach_circuit_mock(circuitmux_t *cmux, circuit_t *circ,
+                               cell_direction_t direction);
+
+static void
+circuitmux_attach_circuit_mock(circuitmux_t *cmux, circuit_t *circ,
+                               cell_direction_t direction)
+{
+  (void)cmux;
+  (void)circ;
+  (void)direction;
+
+  return;
+}
+
+static int
+circuit_package_relay_cell_mock(cell_t *cell, circuit_t *circ,
+                           cell_direction_t cell_direction,
+                           crypt_path_t *layer_hint, streamid_t on_stream,
+                           const char *filename, int lineno)
+{
+  (void)cell; (void)on_stream; (void)filename; (void)lineno;
+
+  if (circ == client_side) {
+    if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATE) {
+      // Deliver to relay
+      circpad_handle_padding_negotiate(relay_side, cell);
+    } else {
+
+      int is_target_hop = circpad_padding_is_from_expected_hop(circ,
+                                                             layer_hint);
+      tt_int_op(cell_direction, OP_EQ, CELL_DIRECTION_OUT);
+      tt_int_op(is_target_hop, OP_EQ, 1);
+
+      // No need to pretend a padding cell was sent: This event is
+      // now emitted internally when the circuitpadding code sends them.
+      //circpad_cell_event_padding_sent(client_side);
+
+      // Receive padding cell at middle
+      circpad_deliver_recognized_relay_cell_events(relay_side,
+              cell->payload[0], NULL);
+    }
+    n_client_cells++;
+  } else if (circ == relay_side) {
+    tt_int_op(cell_direction, OP_EQ, CELL_DIRECTION_IN);
+
+    if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATED) {
+      // XXX: blah need right layer_hint..
+      if (deliver_negotiated)
+        circpad_handle_padding_negotiated(client_side, cell,
+                                          TO_ORIGIN_CIRCUIT(client_side)
+                                             ->cpath->next);
+    } else if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATE) {
+      circpad_handle_padding_negotiate(client_side, cell);
+    } else {
+      // No need to pretend a padding cell was sent: This event is
+      // now emitted internally when the circuitpadding code sends them.
+      //circpad_cell_event_padding_sent(relay_side);
+
+      // Receive padding cell at client
+      circpad_deliver_recognized_relay_cell_events(client_side,
+              cell->payload[0],
+              TO_ORIGIN_CIRCUIT(client_side)->cpath->next);
+    }
+
+    n_relay_cells++;
+  }
+
+ done:
+  timers_advance_and_run(1);
+  return 0;
+}
+
+// Test reading and writing padding to strings (or options_t + consensus)
+void
+test_circuitpadding_serialize(void *arg)
+{
+  (void)arg;
+}
+
+static signed_error_t
+circpad_send_command_to_hop_mock(origin_circuit_t *circ, uint8_t hopnum,
+                                 uint8_t relay_command, const uint8_t *payload,
+                                 ssize_t payload_len)
+{
+  (void) circ;
+  (void) hopnum;
+  (void) relay_command;
+  (void) payload;
+  (void) payload_len;
+  return 0;
+}
+
+void
+test_circuitpadding_rtt(void *arg)
+{
+  /* Test Plan:
+   *
+   * 1. Test RTT measurement server side
+   *    a. test usage of measured RTT
+   * 2. Test termination of RTT measurement
+   *    a. test non-update of RTT
+   * 3. Test client side circuit and non-application of RTT..
+   */
+  circpad_delay_t rtt_estimate;
+  (void)arg;
+
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+  MOCK(circpad_send_command_to_hop, circpad_send_command_to_hop_mock);
+
+  dummy_channel.cmux = circuitmux_alloc();
+  relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel));
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+  circpad_machines_init();
+  helper_create_basic_machine();
+
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side,
+                                                                 0);
+
+  relay_side->padding_machine[0] = &circ_client_machine;
+  relay_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side,0);
+
+  /* Test 1: Test measuring RTT */
+  circpad_cell_event_nonpadding_received((circuit_t*)relay_side);
+  tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_NE, 0);
+
+  timers_advance_and_run(20);
+
+  circpad_cell_event_nonpadding_sent((circuit_t*)relay_side);
+  tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0);
+
+  tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_GE, 19000);
+  tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_LE, 30000);
+  tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0),
+            OP_EQ,
+            relay_side->padding_info[0]->rtt_estimate_usec+
+            circpad_machine_current_state(
+             relay_side->padding_info[0])->start_usec);
+
+  circpad_cell_event_nonpadding_received((circuit_t*)relay_side);
+  circpad_cell_event_nonpadding_received((circuit_t*)relay_side);
+  tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_NE, 0);
+  timers_advance_and_run(20);
+  circpad_cell_event_nonpadding_sent((circuit_t*)relay_side);
+  circpad_cell_event_nonpadding_sent((circuit_t*)relay_side);
+  tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0);
+
+  tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_GE, 20000);
+  tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_LE, 21000);
+  tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0),
+            OP_EQ,
+            relay_side->padding_info[0]->rtt_estimate_usec+
+            circpad_machine_current_state(
+             relay_side->padding_info[0])->start_usec);
+
+  /* Test 2: Termination of RTT measurement (from the previous test) */
+  tt_int_op(relay_side->padding_info[0]->stop_rtt_update, OP_EQ, 1);
+  rtt_estimate = relay_side->padding_info[0]->rtt_estimate_usec;
+
+  circpad_cell_event_nonpadding_received((circuit_t*)relay_side);
+  timers_advance_and_run(4);
+  circpad_cell_event_nonpadding_sent((circuit_t*)relay_side);
+
+  tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_EQ,
+            rtt_estimate);
+  tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->stop_rtt_update, OP_EQ, 1);
+  tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0),
+            OP_EQ,
+            relay_side->padding_info[0]->rtt_estimate_usec+
+            circpad_machine_current_state(
+             relay_side->padding_info[0])->start_usec);
+
+  /* Test 3: Make sure client side machine properly ignores RTT */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->last_received_time_usec, OP_EQ, 0);
+
+  timers_advance_and_run(20);
+  circpad_cell_event_nonpadding_sent((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->last_received_time_usec, OP_EQ, 0);
+
+  tt_int_op(client_side->padding_info[0]->rtt_estimate_usec, OP_EQ, 0);
+  tt_int_op(circpad_histogram_bin_to_usec(client_side->padding_info[0], 0),
+            OP_NE, client_side->padding_info[0]->rtt_estimate_usec);
+  tt_int_op(circpad_histogram_bin_to_usec(client_side->padding_info[0], 0),
+            OP_EQ,
+            circpad_machine_current_state(
+                client_side->padding_info[0])->start_usec);
+ done:
+  free_fake_orcirc(relay_side);
+  circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
+  circuitmux_free(dummy_channel.cmux);
+  timers_shutdown();
+  monotime_disable_test_mocking();
+  UNMOCK(circuit_package_relay_cell);
+  UNMOCK(circuitmux_attach_circuit);
+  tor_free(circ_client_machine.states);
+
+  return;
+}
+
+void
+helper_create_basic_machine(void)
+{
+  /* Start, burst */
+  circpad_machine_states_init(&circ_client_machine, 2);
+
+  circ_client_machine.states[CIRCPAD_STATE_START].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  circ_client_machine.states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  circ_client_machine.states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_CANCEL;
+
+  // FIXME: Is this what we want?
+  circ_client_machine.states[CIRCPAD_STATE_BURST].token_removal =
+      CIRCPAD_TOKEN_REMOVAL_HIGHER;
+
+  // FIXME: Tune this histogram
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_len = 5;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 500;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[0] = 1;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[1] = 0;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[2] = 2;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[3] = 2;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[4] = 2;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_total_tokens = 7;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].use_rtt_estimate = 1;
+
+  return;
+}
+
+#define BIG_HISTOGRAM_LEN 10
+
+/** Setup a machine with a big histogram */
+static void
+helper_create_machine_with_big_histogram(circpad_removal_t removal_strategy)
+{
+  const int tokens_per_bin = 2;
+
+  /* Start, burst */
+  circpad_machine_states_init(&circ_client_machine, 2);
+
+  circpad_state_t *burst_state =
+    &circ_client_machine.states[CIRCPAD_STATE_BURST];
+
+  circ_client_machine.states[CIRCPAD_STATE_START].
+    next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  burst_state->next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST;
+  burst_state->next_state[CIRCPAD_EVENT_NONPADDING_RECV] =CIRCPAD_STATE_BURST;
+
+  burst_state->next_state[CIRCPAD_EVENT_NONPADDING_SENT] =CIRCPAD_STATE_CANCEL;
+
+  burst_state->token_removal = CIRCPAD_TOKEN_REMOVAL_HIGHER;
+
+  burst_state->histogram_len = BIG_HISTOGRAM_LEN;
+  burst_state->start_usec = 0;
+  burst_state->range_usec = 1000;
+
+  int n_tokens = 0;
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    burst_state->histogram[i] = tokens_per_bin;
+    n_tokens += tokens_per_bin;
+  }
+
+  burst_state->histogram_total_tokens = n_tokens;
+  burst_state->length_dist.type = CIRCPAD_DIST_UNIFORM;
+  burst_state->length_dist.param1 = n_tokens;
+  burst_state->length_dist.param2 = n_tokens;
+  burst_state->max_length = n_tokens;
+  burst_state->length_includes_nonpadding = 1;
+  burst_state->use_rtt_estimate = 0;
+  burst_state->token_removal = removal_strategy;
+}
+
+static circpad_decision_t
+circpad_machine_schedule_padding_mock(circpad_machine_state_t *mi)
+{
+  (void)mi;
+  return 0;
+}
+
+static uint64_t
+mock_monotime_absolute_usec(void)
+{
+  return 100;
+}
+
+/** Test higher token removal strategy by bin  */
+static void
+test_circuitpadding_token_removal_higher(void *arg)
+{
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /* Mock it up */
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+  MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock);
+
+  /* Setup test environment (time etc.) */
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  monotime_enable_test_mocking();
+
+  /* Create test machine */
+  helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_HIGHER);
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+
+  /* move the machine to the right state */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  /* Get the machine and setup tokens */
+  mi = client_side->padding_info[0];
+  tt_assert(mi);
+
+  /*************************************************************************/
+
+  uint64_t current_time = monotime_absolute_usec();
+
+  /* Test left boundaries of each histogram bin: */
+  const circpad_delay_t bin_left_bounds[] =
+    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_uint_op(bin_left_bounds[i], OP_EQ,
+               circpad_histogram_bin_to_usec(mi, i));
+  }
+
+  /* Check that all bins have two tokens right now */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* This is the right order to remove tokens from this histogram. That is, we
+   * first remove tokens from the 4th bin since 57 usec is nearest to the 4th
+   * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for
+   * the same reason, then from the 5th, etc. */
+  const int bin_removal_order[] = {4, 5, 6, 7, 8};
+  unsigned i;
+
+  /* Remove all tokens from all bins apart from the infinity bin */
+  for (i = 0; i < sizeof(bin_removal_order)/sizeof(int) ; i++) {
+    int bin_to_remove = bin_removal_order[i];
+    log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin",
+              i, bin_to_remove);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    /* Test that we cleaned out this bin. Don't do this in the case of the last
+       bin since the tokens will get refilled */
+    if (i != BIG_HISTOGRAM_LEN - 2) {
+      tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0);
+    }
+  }
+
+  /* Check that all lowe bins are not touched */
+  for (i=0; i < 4 ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* Test below the lowest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 1;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[0], OP_EQ, 1);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+/** Test lower token removal strategy by bin  */
+static void
+test_circuitpadding_token_removal_lower(void *arg)
+{
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /* Mock it up */
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+  MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock);
+
+  /* Setup test environment (time etc.) */
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  monotime_enable_test_mocking();
+
+  /* Create test machine */
+  helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_LOWER);
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+
+  /* move the machine to the right state */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  /* Get the machine and setup tokens */
+  mi = client_side->padding_info[0];
+  tt_assert(mi);
+
+  /*************************************************************************/
+
+  uint64_t current_time = monotime_absolute_usec();
+
+  /* Test left boundaries of each histogram bin: */
+  const circpad_delay_t bin_left_bounds[] =
+    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_uint_op(bin_left_bounds[i], OP_EQ,
+               circpad_histogram_bin_to_usec(mi, i));
+  }
+
+  /* Check that all bins have two tokens right now */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* This is the right order to remove tokens from this histogram. That is, we
+   * first remove tokens from the 4th bin since 57 usec is nearest to the 4th
+   * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for
+   * the same reason, then from the 5th, etc. */
+  const int bin_removal_order[] = {4, 3, 2, 1, 0};
+  unsigned i;
+
+  /* Remove all tokens from all bins apart from the infinity bin */
+  for (i = 0; i < sizeof(bin_removal_order)/sizeof(int) ; i++) {
+    int bin_to_remove = bin_removal_order[i];
+    log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin",
+              i, bin_to_remove);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    /* Test that we cleaned out this bin. Don't do this in the case of the last
+       bin since the tokens will get refilled */
+    if (i != BIG_HISTOGRAM_LEN - 2) {
+      tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0);
+    }
+  }
+
+  /* Check that all higher bins are untouched */
+  for (i = 5; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* Test above the highest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 29202;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+/** Test closest token removal strategy by bin  */
+static void
+test_circuitpadding_closest_token_removal(void *arg)
+{
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /* Mock it up */
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+  MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock);
+
+  /* Setup test environment (time etc.) */
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  monotime_enable_test_mocking();
+
+  /* Create test machine */
+  helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_CLOSEST);
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+
+  /* move the machine to the right state */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  /* Get the machine and setup tokens */
+  mi = client_side->padding_info[0];
+  tt_assert(mi);
+
+  /*************************************************************************/
+
+  uint64_t current_time = monotime_absolute_usec();
+
+  /* Test left boundaries of each histogram bin: */
+  const circpad_delay_t bin_left_bounds[] =
+    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_uint_op(bin_left_bounds[i], OP_EQ,
+               circpad_histogram_bin_to_usec(mi, i));
+  }
+
+  /* Check that all bins have two tokens right now */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* This is the right order to remove tokens from this histogram. That is, we
+   * first remove tokens from the 4th bin since 57 usec is nearest to the 4th
+   * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for
+   * the same reason, then from the 5th, etc. */
+  const int bin_removal_order[] = {4, 3, 5, 2, 6, 1, 7, 0, 8, 9};
+
+  /* Remove all tokens from all bins apart from the infinity bin */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN-1 ; i++) {
+    int bin_to_remove = bin_removal_order[i];
+    log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin",
+              i, bin_to_remove);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    /* Test that we cleaned out this bin. Don't do this in the case of the last
+       bin since the tokens will get refilled */
+    if (i != BIG_HISTOGRAM_LEN - 2) {
+      tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0);
+    }
+  }
+
+  /* Check that all bins have been refilled */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* Test below the lowest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 102;
+  mi->histogram[0] = 0;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[1], OP_EQ, 1);
+
+  /* Test above the highest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 29202;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+/** Test closest token removal strategy with usec  */
+static void
+test_circuitpadding_closest_token_removal_usec(void *arg)
+{
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /* Mock it up */
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+  MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock);
+
+  /* Setup test environment (time etc.) */
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  monotime_enable_test_mocking();
+
+  /* Create test machine */
+  helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC);
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+
+  /* move the machine to the right state */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  /* Get the machine and setup tokens */
+  mi = client_side->padding_info[0];
+  tt_assert(mi);
+
+  /*************************************************************************/
+
+  uint64_t current_time = monotime_absolute_usec();
+
+  /* Test left boundaries of each histogram bin: */
+  const circpad_delay_t bin_left_bounds[] =
+    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_uint_op(bin_left_bounds[i], OP_EQ,
+               circpad_histogram_bin_to_usec(mi, i));
+  }
+
+  /* XXX we want to test remove_token_exact and
+     circpad_machine_remove_closest_token() with usec */
+
+  /* Check that all bins have two tokens right now */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* This is the right order to remove tokens from this histogram. That is, we
+   * first remove tokens from the 4th bin since 57 usec is nearest to the 4th
+   * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for
+   * the same reason, then from the 5th, etc. */
+  const int bin_removal_order[] = {4, 3, 5, 2, 1, 0, 6, 7, 8, 9};
+
+  /* Remove all tokens from all bins apart from the infinity bin */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN-1 ; i++) {
+    int bin_to_remove = bin_removal_order[i];
+    log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin",
+              i, bin_to_remove);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    /* Test that we cleaned out this bin. Don't do this in the case of the last
+       bin since the tokens will get refilled */
+    if (i != BIG_HISTOGRAM_LEN - 2) {
+      tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0);
+    }
+  }
+
+  /* Check that all bins have been refilled */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* Test below the lowest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 102;
+  mi->histogram[0] = 0;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[1], OP_EQ, 1);
+
+  /* Test above the highest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 29202;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+/** Test closest token removal strategy with usec  */
+static void
+test_circuitpadding_token_removal_exact(void *arg)
+{
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /* Mock it up */
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+  MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock);
+
+  /* Setup test environment (time etc.) */
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  monotime_enable_test_mocking();
+
+  /* Create test machine */
+  helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_EXACT);
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+
+  /* move the machine to the right state */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  /* Get the machine and setup tokens */
+  mi = client_side->padding_info[0];
+  tt_assert(mi);
+
+  /**********************************************************************/
+  uint64_t current_time = monotime_absolute_usec();
+
+  /* Ensure that we will clear out bin #4 with this usec */
+  mi->padding_scheduled_at_usec = current_time - 57;
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+  circpad_machine_remove_token(mi);
+  mi->padding_scheduled_at_usec = current_time - 57;
+  tt_int_op(mi->histogram[4], OP_EQ, 1);
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[4], OP_EQ, 0);
+
+  /* Ensure that we will not remove any other tokens even tho we try to, since
+   * this is what the exact strategy dictates */
+  mi->padding_scheduled_at_usec = current_time - 57;
+  circpad_machine_remove_token(mi);
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    if (i != 4) {
+      tt_int_op(mi->histogram[i], OP_EQ, 2);
+    }
+  }
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+#undef BIG_HISTOGRAM_LEN
+
+void
+test_circuitpadding_tokens(void *arg)
+{
+  const circpad_state_t *state;
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /** Test plan:
+   *
+   * 1. Test symmetry between bin_to_usec and usec_to_bin
+   *    a. Test conversion
+   *    b. Test edge transitions (lower, upper)
+   * 2. Test remove higher on an empty bin
+   *    a. Normal bin
+   *    b. Infinity bin
+   *    c. Bin 0
+   *    d. No higher
+   * 3. Test remove lower
+   *    a. Normal bin
+   *    b. Bin 0
+   *    c. No lower
+   * 4. Test remove closest
+   *    a. Closest lower
+   *    b. Closest higher
+   *    c. Closest 0
+   *    d. Closest Infinity
+   */
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+
+  helper_create_basic_machine();
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side,
+                                                                 0);
+
+  mi = client_side->padding_info[0];
+
+  // Pretend a non-padding cell was sent
+  // XXX: This messes us up.. Padding gets scheduled..
+  circpad_cell_event_nonpadding_sent((circuit_t*)client_side);
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  /* We have to save the infinity bin because one inf delay
+   * could have been chosen when we transition to burst */
+  circpad_hist_token_t inf_bin = mi->histogram[4];
+
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  state = circpad_machine_current_state(client_side->padding_info[0]);
+
+  // Test 0: convert bin->usec->bin
+  // Bin 0+1 have different semantics
+  for (int bin = 0; bin < 2; bin++) {
+    circpad_delay_t usec =
+        circpad_histogram_bin_to_usec(client_side->padding_info[0], bin);
+    int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec);
+    tt_int_op(bin, OP_EQ, bin2);
+  }
+  for (int bin = 2; bin < state->histogram_len-1; bin++) {
+    circpad_delay_t usec =
+        circpad_histogram_bin_to_usec(client_side->padding_info[0], bin);
+    int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec);
+    tt_int_op(bin, OP_EQ, bin2);
+    /* Verify we round down */
+    bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec+3);
+    tt_int_op(bin, OP_EQ, bin2);
+
+    bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec-1);
+    tt_int_op(bin, OP_EQ, bin2+1);
+  }
+
+  // Test 1: converting usec->bin->usec->bin
+  // Bin 0+1 have different semantics.
+  for (circpad_delay_t i = 0; i <= state->start_usec+1; i++) {
+    int bin = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                            i);
+    circpad_delay_t usec =
+        circpad_histogram_bin_to_usec(client_side->padding_info[0], bin);
+    int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec);
+    tt_int_op(bin, OP_EQ, bin2);
+    tt_int_op(i, OP_LE, usec);
+  }
+  for (circpad_delay_t i = state->start_usec+1;
+           i <= state->start_usec + state->range_usec; i++) {
+    int bin = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                            i);
+    circpad_delay_t usec =
+        circpad_histogram_bin_to_usec(client_side->padding_info[0], bin);
+    int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec);
+    tt_int_op(bin, OP_EQ, bin2);
+    tt_int_op(i, OP_GE, usec);
+  }
+
+  /* 2.a. Normal higher bin */
+  {
+    tt_int_op(mi->histogram[2], OP_EQ, 2);
+    tt_int_op(mi->histogram[3], OP_EQ, 2);
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    tt_int_op(mi->histogram[3], OP_EQ, 2);
+    tt_int_op(mi->histogram[2], OP_EQ, 1);
+
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    tt_int_op(mi->histogram[2], OP_EQ, 0);
+
+    tt_int_op(mi->histogram[3], OP_EQ, 2);
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    tt_int_op(mi->histogram[3], OP_EQ, 0);
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    tt_int_op(mi->histogram[3], OP_EQ, 0);
+  }
+
+  /* 2.b. Higher Infinity bin */
+  {
+    tt_int_op(mi->histogram[4], OP_EQ, inf_bin);
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    tt_int_op(mi->histogram[4], OP_EQ, inf_bin);
+
+    /* Test past the infinity bin */
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 5)+1000000);
+
+    tt_int_op(mi->histogram[4], OP_EQ, inf_bin);
+  }
+
+  /* 2.c. Bin 0 */
+  {
+    tt_int_op(mi->histogram[0], OP_EQ, 1);
+    circpad_machine_remove_higher_token(mi,
+         state->start_usec/2);
+    tt_int_op(mi->histogram[0], OP_EQ, 0);
+  }
+
+  /* Drain the infinity bin and cause a refill */
+  while (inf_bin != 0) {
+    tt_int_op(mi->histogram[4], OP_EQ, inf_bin);
+    circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+    inf_bin--;
+  }
+
+  circpad_cell_event_nonpadding_sent((circuit_t*)client_side);
+
+  // We should have refilled here.
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+
+  /* 3.a. Bin 0 */
+  {
+    tt_int_op(mi->histogram[0], OP_EQ, 1);
+    circpad_machine_remove_higher_token(mi,
+         state->start_usec/2);
+    tt_int_op(mi->histogram[0], OP_EQ, 0);
+  }
+
+  /* 3.b. Test remove lower normal bin */
+  {
+    tt_int_op(mi->histogram[3], OP_EQ, 2);
+    circpad_machine_remove_lower_token(mi,
+         circpad_histogram_bin_to_usec(mi, 3)+1);
+    circpad_machine_remove_lower_token(mi,
+         circpad_histogram_bin_to_usec(mi, 3)+1);
+    tt_int_op(mi->histogram[3], OP_EQ, 0);
+    tt_int_op(mi->histogram[2], OP_EQ, 2);
+    circpad_machine_remove_lower_token(mi,
+         circpad_histogram_bin_to_usec(mi, 3)+1);
+    circpad_machine_remove_lower_token(mi,
+         circpad_histogram_bin_to_usec(mi, 3)+1);
+    /* 3.c. No lower */
+    circpad_machine_remove_lower_token(mi,
+         circpad_histogram_bin_to_usec(mi, 3)+1);
+    tt_int_op(mi->histogram[2], OP_EQ, 0);
+  }
+
+  /* 4. Test remove closest
+   *    a. Closest lower
+   *    b. Closest higher
+   *    c. Closest 0
+   *    d. Closest Infinity
+   */
+  circpad_machine_setup_tokens(mi);
+  tt_int_op(mi->histogram[2], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  tt_int_op(mi->histogram[2], OP_EQ, 0);
+  tt_int_op(mi->histogram[3], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  tt_int_op(mi->histogram[3], OP_EQ, 0);
+  tt_int_op(mi->histogram[0], OP_EQ, 1);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  tt_int_op(mi->histogram[0], OP_EQ, 0);
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+
+  /* 5. Test remove closest usec
+   *    a. Closest 0
+   *    b. Closest lower (below midpoint)
+   *    c. Closest higher (above midpoint)
+   *    d. Closest Infinity
+   */
+  circpad_machine_setup_tokens(mi);
+
+  tt_int_op(mi->histogram[0], OP_EQ, 1);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 0)/3, 1);
+  tt_int_op(mi->histogram[0], OP_EQ, 0);
+  tt_int_op(mi->histogram[2], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 0)/3, 1);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 0)/3, 1);
+  tt_int_op(mi->histogram[2], OP_EQ, 0);
+  tt_int_op(mi->histogram[3], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 4), 1);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 4), 1);
+  tt_int_op(mi->histogram[3], OP_EQ, 0);
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 4), 1);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 4), 1);
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+
+  // XXX: Need more coverage of the actual usec branches
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+void
+test_circuitpadding_wronghop(void *arg)
+{
+  /**
+   * Test plan:
+   * 1. Padding sent from hop 1 and 3 to client
+   * 2. Send negotiated from hop 1 and 3 to client
+   * 3. Garbled negotiated cell
+   * 4. Padding negotiate sent to client
+   * 5. Send negotiate stop command for unknown machine
+   * 6. Send negotiated to relay
+   * 7. Garbled padding negotiate cell
+   */
+  (void)arg;
+  uint32_t read_bw = 0, overhead_bw = 0;
+  cell_t cell;
+  signed_error_t ret;
+  origin_circuit_t *orig_client;
+
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+
+  client_side = (circuit_t *)origin_circuit_new();
+  dummy_channel.cmux = circuitmux_alloc();
+  relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel,
+                                            &dummy_channel);
+  orig_client = TO_ORIGIN_CIRCUIT(client_side);
+
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  nodes_init();
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+  circpad_machines_init();
+
+  MOCK(node_get_by_id,
+       node_get_by_id_mock);
+
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+
+  /* Build three hops */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* verify padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL);
+  tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL);
+
+  /* verify echo was sent */
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  read_bw = orig_client->n_delivered_read_circ_bw;
+  overhead_bw = orig_client->n_overhead_read_circ_bw;
+
+  /* 1. Test padding from first and third hop */
+  circpad_deliver_recognized_relay_cell_events(client_side,
+              RELAY_COMMAND_DROP,
+              TO_ORIGIN_CIRCUIT(client_side)->cpath);
+  tt_int_op(read_bw, OP_EQ,
+            orig_client->n_delivered_read_circ_bw);
+  tt_int_op(overhead_bw, OP_EQ,
+            orig_client->n_overhead_read_circ_bw);
+
+  circpad_deliver_recognized_relay_cell_events(client_side,
+              RELAY_COMMAND_DROP,
+              TO_ORIGIN_CIRCUIT(client_side)->cpath->next->next);
+  tt_int_op(read_bw, OP_EQ,
+            orig_client->n_delivered_read_circ_bw);
+  tt_int_op(overhead_bw, OP_EQ,
+            orig_client->n_overhead_read_circ_bw);
+
+  circpad_deliver_recognized_relay_cell_events(client_side,
+              RELAY_COMMAND_DROP,
+              TO_ORIGIN_CIRCUIT(client_side)->cpath->next);
+  tt_int_op(read_bw, OP_EQ,
+            orig_client->n_delivered_read_circ_bw);
+  tt_int_op(overhead_bw, OP_LT,
+            orig_client->n_overhead_read_circ_bw);
+
+  /* 2. Test padding negotiated not handled from hops 1,3 */
+  ret = circpad_handle_padding_negotiated(client_side, &cell,
+          TO_ORIGIN_CIRCUIT(client_side)->cpath);
+  tt_int_op(ret, OP_EQ, -1);
+
+  ret = circpad_handle_padding_negotiated(client_side, &cell,
+          TO_ORIGIN_CIRCUIT(client_side)->cpath->next->next);
+  tt_int_op(ret, OP_EQ, -1);
+
+  /* 3. Garbled negotiated cell */
+  memset(&cell, 255, sizeof(cell));
+  ret = circpad_handle_padding_negotiated(client_side, &cell,
+          TO_ORIGIN_CIRCUIT(client_side)->cpath->next);
+  tt_int_op(ret, OP_EQ, -1);
+
+  /* 4. Test that negotiate is dropped at origin */
+  read_bw = orig_client->n_delivered_read_circ_bw;
+  overhead_bw = orig_client->n_overhead_read_circ_bw;
+  relay_send_command_from_edge(0, relay_side,
+                               RELAY_COMMAND_PADDING_NEGOTIATE,
+                               (void*)cell.payload,
+                               (size_t)3, NULL);
+  tt_int_op(read_bw, OP_EQ,
+            orig_client->n_delivered_read_circ_bw);
+  tt_int_op(overhead_bw, OP_EQ,
+            orig_client->n_overhead_read_circ_bw);
+
+  tt_int_op(n_relay_cells, OP_EQ, 2);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  /* 5. Test that asking to stop the wrong machine does nothing */
+  circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(client_side),
+                            255, 2, CIRCPAD_COMMAND_STOP);
+  tt_ptr_op(client_side->padding_machine[0], OP_NE, NULL);
+  tt_ptr_op(client_side->padding_info[0], OP_NE, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL);
+  tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL);
+  tt_int_op(n_relay_cells, OP_EQ, 3);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+
+  /* 6. Sending negotiated command to relay does nothing */
+  ret = circpad_handle_padding_negotiated(relay_side, &cell, NULL);
+  tt_int_op(ret, OP_EQ, -1);
+
+  /* 7. Test garbled negotated cell (bad command 255) */
+  memset(&cell, 0, sizeof(cell));
+  ret = circpad_handle_padding_negotiate(relay_side, &cell);
+  tt_int_op(ret, OP_EQ, -1);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+
+  /* Test 2: Test no padding */
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  free_fake_orcirc(relay_side);
+
+  client_side = (circuit_t *)origin_circuit_new();
+  relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel,
+                                            &dummy_channel);
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 0);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+  tt_int_op(n_relay_cells, OP_EQ, 3);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+
+  /* verify no echo was sent */
+  tt_int_op(n_relay_cells, OP_EQ, 3);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+
+  /* Finish circuit */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* Spoof padding negotiated on circuit with no padding */
+  circpad_padding_negotiated(relay_side,
+                             CIRCPAD_MACHINE_CIRC_SETUP,
+                             CIRCPAD_COMMAND_START,
+                             CIRCPAD_RESPONSE_OK);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+  circpad_padding_negotiated(relay_side,
+                             CIRCPAD_MACHINE_CIRC_SETUP,
+                             CIRCPAD_COMMAND_START,
+                             CIRCPAD_RESPONSE_ERR);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  free_fake_orcirc(relay_side);
+  circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
+  circuitmux_free(dummy_channel.cmux);
+  monotime_disable_test_mocking();
+  UNMOCK(node_get_by_id);
+  UNMOCK(circuit_package_relay_cell);
+  UNMOCK(circuitmux_attach_circuit);
+  nodes_free();
+}
+
+void
+test_circuitpadding_negotiation(void *arg)
+{
+  /**
+   * Test plan:
+   * 1. Test circuit where padding is supported by middle
+   *    a. Make sure padding negotiation is sent
+   *    b. Test padding negotiation delivery and parsing
+   * 2. Test circuit where padding is unsupported by middle
+   *    a. Make sure padding negotiation is not sent
+   * 3. Test failure to negotiate a machine due to desync.
+   */
+  (void)arg;
+
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  dummy_channel.cmux = circuitmux_alloc();
+  relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel));
+
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  nodes_init();
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+  circpad_machines_init();
+
+  MOCK(node_get_by_id,
+       node_get_by_id_mock);
+
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+
+  /* Build two hops */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* verify padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL);
+  tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL);
+
+  /* verify echo was sent */
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  /* Finish circuit */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* Test 2: Test no padding */
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  free_fake_orcirc(relay_side);
+
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel));
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 0);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  /* verify no echo was sent */
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  /* Finish circuit */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* Force negotiate padding. */
+  circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(client_side),
+                            CIRCPAD_MACHINE_CIRC_SETUP,
+                            2, CIRCPAD_COMMAND_START);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+  /* verify no echo was sent */
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  /* 3. Test failure to negotiate a machine due to desync */
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  free_fake_orcirc(relay_side);
+
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel));
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  SMARTLIST_FOREACH(relay_padding_machines,
+          circpad_machine_spec_t *,
+          m, tor_free(m->states); tor_free(m));
+  smartlist_free(relay_padding_machines);
+  relay_padding_machines = smartlist_new();
+
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* verify echo was sent */
+  tt_int_op(n_client_cells, OP_EQ, 2);
+  tt_int_op(n_relay_cells, OP_EQ, 2);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  free_fake_orcirc(relay_side);
+  circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
+  circuitmux_free(dummy_channel.cmux);
+  monotime_disable_test_mocking();
+  UNMOCK(node_get_by_id);
+  UNMOCK(circuit_package_relay_cell);
+  UNMOCK(circuitmux_attach_circuit);
+  nodes_free();
+}
+
+static void
+simulate_single_hop_extend(circuit_t *client, circuit_t *mid_relay,
+                           int padding)
+{
+  char whatevs_key[CPATH_KEY_MATERIAL_LEN];
+  char digest[DIGEST_LEN];
+  tor_addr_t addr;
+
+  // Pretend a non-padding cell was sent
+  circpad_cell_event_nonpadding_sent((circuit_t*)client);
+
+  // Receive extend cell at middle
+  circpad_cell_event_nonpadding_received((circuit_t*)mid_relay);
+
+  // Advance time a tiny bit so we can calculate an RTT
+  curr_mocked_time += 10 * TOR_NSEC_PER_MSEC;
+  monotime_coarse_set_mock_time_nsec(curr_mocked_time);
+  monotime_set_mock_time_nsec(curr_mocked_time);
+
+  // Receive extended cell at middle
+  circpad_cell_event_nonpadding_sent((circuit_t*)mid_relay);
+
+  // Receive extended cell at first hop
+  circpad_cell_event_nonpadding_received((circuit_t*)client);
+
+  // Add a hop to cpath
+  crypt_path_t *hop = tor_malloc_zero(sizeof(crypt_path_t));
+  onion_append_to_cpath(&TO_ORIGIN_CIRCUIT(client)->cpath, hop);
+
+  hop->magic = CRYPT_PATH_MAGIC;
+  hop->state = CPATH_STATE_OPEN;
+
+  // add an extend info to indicate if this node supports padding or not.
+  // (set the first byte of the digest for our mocked node_get_by_id)
+  digest[0] = padding;
+
+  hop->extend_info = extend_info_new(
+          padding ? "padding" : "non-padding",
+          digest, NULL, NULL, NULL,
+          &addr, padding);
+
+  circuit_init_cpath_crypto(hop, whatevs_key, sizeof(whatevs_key), 0, 0);
+
+  hop->package_window = circuit_initial_package_window();
+  hop->deliver_window = CIRCWINDOW_START;
+
+  // Signal that the hop was added
+  circpad_machine_event_circ_added_hop(TO_ORIGIN_CIRCUIT(client));
+}
+
+static circpad_machine_spec_t *
+helper_create_conditional_machine(void)
+{
+  circpad_machine_spec_t *ret =
+    tor_malloc_zero(sizeof(circpad_machine_spec_t));
+
+  /* Start, burst */
+  circpad_machine_states_init(ret, 2);
+
+  ret->states[CIRCPAD_STATE_START].
+      next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_BURST;
+
+  ret->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_BURST;
+
+  ret->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END;
+
+  ret->states[CIRCPAD_STATE_BURST].token_removal =
+      CIRCPAD_TOKEN_REMOVAL_NONE;
+
+  ret->states[CIRCPAD_STATE_BURST].histogram_len = 3;
+  ret->states[CIRCPAD_STATE_BURST].start_usec = 0;
+  ret->states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+  ret->states[CIRCPAD_STATE_BURST].histogram[0] = 6;
+  ret->states[CIRCPAD_STATE_BURST].histogram[1] = 0;
+  ret->states[CIRCPAD_STATE_BURST].histogram[1] = 0;
+  ret->states[CIRCPAD_STATE_BURST].histogram_total_tokens = 6;
+  ret->states[CIRCPAD_STATE_BURST].use_rtt_estimate = 0;
+  ret->states[CIRCPAD_STATE_BURST].length_includes_nonpadding = 1;
+
+  return ret;
+}
+
+static void
+helper_create_conditional_machines(void)
+{
+  circpad_machine_spec_t *add = helper_create_conditional_machine();
+  origin_padding_machines = smartlist_new();
+  relay_padding_machines = smartlist_new();
+
+  add->machine_num = 2;
+  add->is_origin_side = 1;
+  add->should_negotiate_end = 1;
+  add->target_hopnum = 2;
+
+  /* Let's have this one end after 4 packets */
+  add->states[CIRCPAD_STATE_BURST].length_dist.type = CIRCPAD_DIST_UNIFORM;
+  add->states[CIRCPAD_STATE_BURST].length_dist.param1 = 4;
+  add->states[CIRCPAD_STATE_BURST].length_dist.param2 = 4;
+  add->states[CIRCPAD_STATE_BURST].max_length = 4;
+
+  add->conditions.requires_vanguards = 0;
+  add->conditions.min_hops = 2;
+  add->conditions.state_mask = CIRCPAD_CIRC_BUILDING|
+           CIRCPAD_CIRC_NO_STREAMS|CIRCPAD_CIRC_HAS_RELAY_EARLY;
+  add->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL;
+
+  smartlist_add(origin_padding_machines, add);
+
+  add = helper_create_conditional_machine();
+  add->machine_num = 3;
+  add->is_origin_side = 1;
+  add->should_negotiate_end = 1;
+  add->target_hopnum = 2;
+
+  /* Let's have this one end after 4 packets */
+  add->states[CIRCPAD_STATE_BURST].length_dist.type = CIRCPAD_DIST_UNIFORM;
+  add->states[CIRCPAD_STATE_BURST].length_dist.param1 = 4;
+  add->states[CIRCPAD_STATE_BURST].length_dist.param2 = 4;
+  add->states[CIRCPAD_STATE_BURST].max_length = 4;
+
+  add->conditions.requires_vanguards = 1;
+  add->conditions.min_hops = 3;
+  add->conditions.state_mask = CIRCPAD_CIRC_OPENED|
+           CIRCPAD_CIRC_STREAMS|CIRCPAD_CIRC_HAS_NO_RELAY_EARLY;
+  add->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL;
+  smartlist_add(origin_padding_machines, add);
+
+  add = helper_create_conditional_machine();
+  add->machine_num = 2;
+  smartlist_add(relay_padding_machines, add);
+
+  add = helper_create_conditional_machine();
+  add->machine_num = 3;
+  smartlist_add(relay_padding_machines, add);
+}
+
+void
+test_circuitpadding_conditions(void *arg)
+{
+  /**
+   * Test plan:
+   *  0. Make a few origin and client machines with diff conditions
+   *     * vanguards, purposes, has_opened circs, no relay early
+   *     * Client side should_negotiate_end
+   *     * Length limits
+   *  1. Test STATE_END transitions
+   *  2. Test new machine after end with same conditions
+   *  3. Test new machine due to changed conditions
+   *     * Esp: built event, no relay early, no streams
+   * XXX: Diff test:
+   *  1. Test STATE_END with pending timers
+   *  2. Test marking a circuit before padding callback fires
+   *  3. Test freeing a circuit before padding callback fires
+   */
+  (void)arg;
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+
+  nodes_init();
+  dummy_channel.cmux = circuitmux_alloc();
+  relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel,
+                                            &dummy_channel);
+  client_side = (circuit_t *)origin_circuit_new();
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+  helper_create_conditional_machines();
+
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+  MOCK(node_get_by_id,
+       node_get_by_id_mock);
+
+  /* Simulate extend. This should result in the original machine getting
+   * added, since the circuit is not built */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* Verify that machine #2 is added */
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2);
+
+  /* Deliver a padding cell to the client, to trigger burst state */
+  circpad_cell_event_padding_sent(client_side);
+
+  /* This should have trigger length shutdown condition on client.. */
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+  /* Verify machine is gone from both sides */
+  tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+
+  /* Send another event.. verify machine gets re-added properly
+   * (test race with shutdown) */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2);
+
+  TO_ORIGIN_CIRCUIT(client_side)->p_streams = 0;
+  circpad_machine_event_circ_has_no_streams(TO_ORIGIN_CIRCUIT(client_side));
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2);
+
+  /* Now make the circuit opened and send built event */
+  TO_ORIGIN_CIRCUIT(client_side)->has_opened = 1;
+  circpad_machine_event_circ_built(TO_ORIGIN_CIRCUIT(client_side));
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2);
+
+  TO_ORIGIN_CIRCUIT(client_side)->remaining_relay_early_cells = 0;
+  circpad_machine_event_circ_has_no_relay_early(
+          TO_ORIGIN_CIRCUIT(client_side));
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2);
+
+  get_options_mutable()->HSLayer2Nodes = (void*)1;
+  TO_ORIGIN_CIRCUIT(client_side)->p_streams = (void*)1;
+  circpad_machine_event_circ_has_streams(TO_ORIGIN_CIRCUIT(client_side));
+
+  /* Verify different machine is added */
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 3);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 3);
+
+  /* Hold off on negotiated */
+  deliver_negotiated = 0;
+
+  /* Deliver a padding cell to the client, to trigger burst state */
+  circpad_cell_event_padding_sent(client_side);
+
+  /* This should have trigger length shutdown condition on client
+   * but not the response for the padding machine */
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_NE, NULL);
+
+  /* Verify machine is gone from the relay (but negotiated not back yet */
+  tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+
+  /* Add another hop and verify it's back */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 3);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 3);
+
+  tt_ptr_op(client_side->padding_info[0], OP_NE, NULL);
+  tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL);
+
+ done:
+  /* XXX: Free everything */
+  return;
+}
+
+void
+test_circuitpadding_circuitsetup_machine(void *arg)
+{
+  /**
+   * Test case plan:
+   *
+   * 1. Simulate a normal circuit setup pattern
+   *    a. Application traffic
+   *
+   * FIXME: This should focus more on exercising the machine
+   * features rather than actual traffic patterns. For example,
+   * test cancellation and bins empty/refill
+   */
+  (void)arg;
+
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+
+  dummy_channel.cmux = circuitmux_alloc();
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel));
+
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  nodes_init();
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+  circpad_machines_init();
+
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+  MOCK(node_get_by_id,
+       node_get_by_id_mock);
+
+  /* Test case #1: Build a 3 hop circuit, then wait and let pad */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  tt_int_op(n_client_cells, OP_EQ, 1);
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+                CIRCPAD_STATE_BURST);
+  tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ,
+          CIRCPAD_STATE_BURST);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->is_padding_timer_scheduled,
+            OP_EQ, 0);
+  timers_advance_and_run(2000);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+
+  tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ,
+              CIRCPAD_STATE_GAP);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  timers_advance_and_run(5000);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+  tt_int_op(n_relay_cells, OP_EQ, 2);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  timers_advance_and_run(2000);
+  tt_int_op(n_client_cells, OP_EQ, 3);
+  tt_int_op(n_relay_cells, OP_EQ, 2);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  timers_advance_and_run(5000);
+  tt_int_op(n_client_cells, OP_EQ, 3);
+  tt_int_op(n_relay_cells, OP_EQ, 3);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  timers_advance_and_run(2000);
+  tt_int_op(n_client_cells, OP_EQ, 4);
+  tt_int_op(n_relay_cells, OP_EQ, 3);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  timers_advance_and_run(5000);
+  tt_int_op(n_client_cells, OP_EQ, 4);
+  tt_int_op(n_relay_cells, OP_EQ, 4);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  timers_advance_and_run(2000);
+  tt_int_op(n_client_cells, OP_EQ, 5);
+  tt_int_op(n_relay_cells, OP_EQ, 4);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  timers_advance_and_run(5000);
+  tt_int_op(n_client_cells, OP_EQ, 5);
+  tt_int_op(n_relay_cells, OP_EQ, 5);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  timers_advance_and_run(2000);
+  tt_int_op(n_client_cells, OP_EQ, 6);
+  tt_int_op(n_relay_cells, OP_EQ, 5);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  timers_advance_and_run(5000);
+  tt_int_op(n_client_cells, OP_EQ, 6);
+  tt_int_op(n_relay_cells, OP_EQ, 6);
+
+  tt_int_op(client_side->padding_info[0]->current_state,
+            OP_EQ, CIRCPAD_STATE_END);
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->current_state,
+            OP_EQ, CIRCPAD_STATE_GAP);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+
+  /* Verify we can't schedule padding in END state */
+  circpad_decision_t ret =
+      circpad_machine_schedule_padding(client_side->padding_info[0]);
+  tt_int_op(ret, OP_EQ, CIRCPAD_STATE_UNCHANGED);
+
+  /* Simulate application traffic */
+  circpad_cell_event_nonpadding_sent(client_side);
+  circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_OUT);
+  circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_IN);
+  circpad_deliver_recognized_relay_cell_events(client_side, RELAY_COMMAND_DATA,
+                                  TO_ORIGIN_CIRCUIT(client_side)->cpath->next);
+
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+  tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_int_op(n_client_cells, OP_EQ, 6);
+  tt_int_op(n_relay_cells, OP_EQ, 7);
+
+  // Test timer cancellation
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  timers_advance_and_run(5000);
+  circpad_cell_event_padding_received(client_side);
+
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+                CIRCPAD_STATE_BURST);
+  tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ,
+          CIRCPAD_STATE_GAP);
+
+  tt_int_op(n_client_cells, OP_EQ, 8);
+  tt_int_op(n_relay_cells, OP_EQ, 8);
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+
+  /* Test timer cancel due to state rules */
+  circpad_cell_event_nonpadding_sent(client_side);
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  circpad_cell_event_padding_received(client_side);
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+
+  /* Simulate application traffic to cancel timer */
+  circpad_cell_event_nonpadding_sent(client_side);
+  circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_OUT);
+  circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_IN);
+  circpad_deliver_recognized_relay_cell_events(client_side, RELAY_COMMAND_DATA,
+                                  TO_ORIGIN_CIRCUIT(client_side)->cpath->next);
+
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+  tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+
+  /* No cells sent, except negotiate end from relay */
+  tt_int_op(n_client_cells, OP_EQ, 8);
+  tt_int_op(n_relay_cells, OP_EQ, 9);
+
+  /* Test mark for close and free */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  timers_advance_and_run(5000);
+  circpad_cell_event_padding_received(client_side);
+
+  tt_int_op(n_client_cells, OP_EQ, 10);
+  tt_int_op(n_relay_cells, OP_EQ, 10);
+
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+                CIRCPAD_STATE_BURST);
+  tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ,
+          CIRCPAD_STATE_GAP);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  circuit_mark_for_close(client_side, END_CIRC_REASON_FLAG_REMOTE);
+  free_fake_orcirc(relay_side);
+  timers_advance_and_run(5000);
+
+  /* No cells sent */
+  tt_int_op(n_client_cells, OP_EQ, 10);
+  tt_int_op(n_relay_cells, OP_EQ, 10);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+
+  circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
+  circuitmux_free(dummy_channel.cmux);
+  timers_shutdown();
+  monotime_disable_test_mocking();
+  UNMOCK(circuit_package_relay_cell);
+  UNMOCK(circuitmux_attach_circuit);
+
+  return;
+}
+
+/** Helper function: Initializes a padding machine where every state uses the
+ *  uniform probability distribution.  */
+static void
+helper_circpad_circ_distribution_machine_setup(int min, int max)
+{
+  circpad_machine_states_init(&circ_client_machine, 7);
+
+  circpad_state_t *zero_st = &circ_client_machine.states[0];
+  zero_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 1;
+  zero_st->iat_dist.type = CIRCPAD_DIST_UNIFORM;
+  zero_st->iat_dist.param1 = min;
+  zero_st->iat_dist.param2 = max;
+  zero_st->start_usec = min;
+  zero_st->range_usec = max;
+
+  circpad_state_t *first_st = &circ_client_machine.states[1];
+  first_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 2;
+  first_st->iat_dist.type = CIRCPAD_DIST_LOGISTIC;
+  first_st->iat_dist.param1 = min;
+  first_st->iat_dist.param2 = max;
+  first_st->start_usec = min;
+  first_st->range_usec = max;
+
+  circpad_state_t *second_st = &circ_client_machine.states[2];
+  second_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 3;
+  second_st->iat_dist.type = CIRCPAD_DIST_LOG_LOGISTIC;
+  second_st->iat_dist.param1 = min;
+  second_st->iat_dist.param2 = max;
+  second_st->start_usec = min;
+  second_st->range_usec = max;
+
+  circpad_state_t *third_st = &circ_client_machine.states[3];
+  third_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 4;
+  third_st->iat_dist.type = CIRCPAD_DIST_GEOMETRIC;
+  third_st->iat_dist.param1 = min;
+  third_st->iat_dist.param2 = max;
+  third_st->start_usec = min;
+  third_st->range_usec = max;
+
+  circpad_state_t *fourth_st = &circ_client_machine.states[4];
+  fourth_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 5;
+  fourth_st->iat_dist.type = CIRCPAD_DIST_WEIBULL;
+  fourth_st->iat_dist.param1 = min;
+  fourth_st->iat_dist.param2 = max;
+  fourth_st->start_usec = min;
+  fourth_st->range_usec = max;
+
+  circpad_state_t *fifth_st = &circ_client_machine.states[5];
+  fifth_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 6;
+  fifth_st->iat_dist.type = CIRCPAD_DIST_PARETO;
+  fifth_st->iat_dist.param1 = min;
+  fifth_st->iat_dist.param2 = max;
+  fifth_st->start_usec = min;
+  fifth_st->range_usec = max;
+}
+
+/** Simple test that the padding delays sampled from a uniform distribution
+ *  actually faill within the uniform distribution range. */
+/* TODO: Upgrade this test so that each state tests a different prob
+ * distribution */
+static void
+test_circuitpadding_sample_distribution(void *arg)
+{
+  circpad_machine_state_t *mi;
+  int n_samples;
+  int n_states;
+
+  (void) arg;
+
+  /* mock this function so that we dont actually schedule any padding */
+  MOCK(circpad_machine_schedule_padding,
+       circpad_machine_schedule_padding_mock);
+
+  /* Initialize a machine with multiple probability distributions that should
+   * return values between 0 and 5 */
+  circpad_machines_init();
+  helper_circpad_circ_distribution_machine_setup(0, 10);
+
+  /* Initialize machine and circuits */
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+  mi = client_side->padding_info[0];
+
+  /* For every state, sample a bunch of values from the distribution and ensure
+   * they fall within range. */
+  for (n_states = 0 ; n_states < 6; n_states++) {
+    /* Make sure we in the right state */
+    tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, n_states);
+
+    for (n_samples = 0; n_samples < 100; n_samples++) {
+      circpad_delay_t delay = circpad_machine_sample_delay(mi);
+      tt_int_op(delay, OP_GE, 0);
+      tt_int_op(delay, OP_LE, 10);
+    }
+
+    /* send a non-padding cell to move to the next machine state */
+    circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  }
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  UNMOCK(circpad_machine_schedule_padding);
+}
+
+static circpad_decision_t
+circpad_machine_spec_transition_mock(circpad_machine_state_t *mi,
+                                circpad_event_t event)
+{
+  (void) mi;
+  (void) event;
+
+  return CIRCPAD_STATE_UNCHANGED;
+}
+
+/* Test per-machine padding rate limits */
+static void
+test_circuitpadding_machine_rate_limiting(void *arg)
+{
+  (void) arg;
+  bool retval;
+  circpad_machine_state_t *mi;
+  int i;
+
+  /* Ignore machine transitions for the purposes of this function, we only
+   * really care about padding counts */
+  MOCK(circpad_machine_spec_transition, circpad_machine_spec_transition_mock);
+  MOCK(circpad_send_command_to_hop, circpad_send_command_to_hop_mock);
+
+  /* Setup machine and circuits */
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  helper_create_basic_machine();
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+  mi = client_side->padding_info[0];
+  /* Set up the machine info so that we can get through the basic functions */
+  mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE;
+
+  /* First we are going to test the per-machine rate limits */
+  circ_client_machine.max_padding_percent = 50;
+  circ_client_machine.allowed_padding_count = 100;
+
+  /* Check padding limit, should be fine since we haven't sent anything yet. */
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 0);
+
+  /* Send 99 padding cells which is below circpad_global_allowed_cells=100, so
+   * the rate limit will not trigger */
+  for (i=0;i<99;i++) {
+    circpad_send_padding_cell_for_callback(mi);
+  }
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 0);
+
+  /* Now send another padding cell to pass circpad_global_allowed_cells=100,
+     and see that the limit will trigger */
+  circpad_send_padding_cell_for_callback(mi);
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 1);
+
+  retval = circpad_machine_schedule_padding(mi);
+  tt_int_op(retval, OP_EQ, CIRCPAD_STATE_UNCHANGED);
+
+  /* Cover wrap */
+  for (;i<UINT16_MAX;i++) {
+    circpad_send_padding_cell_for_callback(mi);
+  }
+  tt_int_op(mi->padding_sent, OP_EQ, UINT16_MAX/2+1);
+
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, mi);
+  for (i=0;i<UINT16_MAX;i++) {
+    circpad_cell_event_nonpadding_sent(client_side);
+  }
+
+  tt_int_op(mi->nonpadding_sent, OP_EQ, UINT16_MAX/2);
+  tt_int_op(mi->padding_sent, OP_EQ, UINT16_MAX/4+1);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+}
+
+/* Test global padding rate limits */
+static void
+test_circuitpadding_global_rate_limiting(void *arg)
+{
+  (void) arg;
+  bool retval;
+  circpad_machine_state_t *mi;
+  int i;
+
+  /* Ignore machine transitions for the purposes of this function, we only
+   * really care about padding counts */
+  MOCK(circpad_machine_spec_transition, circpad_machine_spec_transition_mock);
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+  timers_initialize();
+
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  dummy_channel.cmux = circuitmux_alloc();
+
+  /* Setup machine and circuits */
+  relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel, &dummy_channel);
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  helper_create_basic_machine();
+  relay_side->padding_machine[0] = &circ_client_machine;
+  relay_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(relay_side, 0);
+  mi = relay_side->padding_info[0];
+  /* Set up the machine info so that we can get through the basic functions */
+  mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE;
+
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* Now test the global limits by setting up the consensus */
+  networkstatus_t vote1;
+  vote1.net_params = smartlist_new();
+  smartlist_split_string(vote1.net_params,
+         "circpad_global_allowed_cells=100 circpad_global_max_padding_pct=50",
+                         NULL, 0, 0);
+  /* Register global limits with the padding subsystem */
+  circpad_new_consensus_params(&vote1);
+
+  /* Check padding limit, should be fine since we haven't sent anything yet. */
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 0);
+
+  /* Send 99 padding cells which is below circpad_global_allowed_cells=100, so
+   * the rate limit will not trigger */
+  for (i=0;i<99;i++) {
+    circpad_send_padding_cell_for_callback(mi);
+  }
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 0);
+
+  /* Now send another padding cell to pass circpad_global_allowed_cells=100,
+     and see that the limit will trigger */
+  circpad_send_padding_cell_for_callback(mi);
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 1);
+
+  retval = circpad_machine_schedule_padding(mi);
+  tt_int_op(retval, OP_EQ, CIRCPAD_STATE_UNCHANGED);
+
+  /* Now send 92 non-padding cells to get near the
+   * circpad_global_max_padding_pct=50 limit; in particular with 96 non-padding
+   * cells, the padding traffic is still 51% of total traffic so limit should
+   * trigger */
+  for (i=0;i<92;i++) {
+    circpad_cell_event_nonpadding_sent(relay_side);
+  }
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 1);
+
+  /* Send another non-padding cell to bring the padding traffic to 50% of total
+   * traffic and get past the limit */
+  circpad_cell_event_nonpadding_sent(relay_side);
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 0);
+
+ done:
+  free_fake_orcirc(relay_side);
+  circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
+  circuitmux_free(dummy_channel.cmux);
+  SMARTLIST_FOREACH(vote1.net_params, char *, cp, tor_free(cp));
+  smartlist_free(vote1.net_params);
+}
+
+#define TEST_CIRCUITPADDING(name, flags) \
+    { #name, test_##name, (flags), NULL, NULL }
+
+struct testcase_t circuitpadding_tests[] = {
+  //TEST_CIRCUITPADDING(circuitpadding_circuitsetup_machine, 0),
+  TEST_CIRCUITPADDING(circuitpadding_tokens, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_negotiation, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_wronghop, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_circuitsetup_machine, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_conditions, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_rtt, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_sample_distribution, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_machine_rate_limiting, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_global_rate_limiting, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_token_removal_lower, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_token_removal_higher, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_closest_token_removal, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_closest_token_removal_usec, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_token_removal_exact, TT_FORK),
+  END_OF_TESTCASES
+};
+
diff --git a/src/test/test_containers.c b/src/test/test_containers.c
index 717eb0892a..ad0edf4aa3 100644
--- a/src/test/test_containers.c
+++ b/src/test/test_containers.c
@@ -96,6 +96,30 @@ test_container_smartlist_basic(void *arg)
   tor_free(v555);
 }
 
+/** Test SMARTLIST_FOREACH_REVERSE_BEGIN loop macro */
+static void
+test_container_smartlist_foreach_reverse(void *arg)
+{
+  smartlist_t *sl = smartlist_new();
+  int i;
+
+  (void) arg;
+
+  /* Add integers to smartlist in increasing order */
+  for (i=0;i<100;i++) {
+    smartlist_add(sl, (void*)(uintptr_t)i);
+  }
+
+  /* Pop them out in reverse and test their value */
+  SMARTLIST_FOREACH_REVERSE_BEGIN(sl, void*, k) {
+    i--;
+    tt_ptr_op(k, OP_EQ, (void*)(uintptr_t)i);
+  } SMARTLIST_FOREACH_END(k);
+
+ done:
+  smartlist_free(sl);
+}
+
 /** Run unit tests for smartlist-of-strings functionality. */
 static void
 test_container_smartlist_strings(void *arg)
@@ -1281,6 +1305,7 @@ test_container_smartlist_strings_eq(void *arg)
 struct testcase_t container_tests[] = {
   CONTAINER_LEGACY(smartlist_basic),
   CONTAINER_LEGACY(smartlist_strings),
+  CONTAINER_LEGACY(smartlist_foreach_reverse),
   CONTAINER_LEGACY(smartlist_overlap),
   CONTAINER_LEGACY(smartlist_digests),
   CONTAINER_LEGACY(smartlist_join),
diff --git a/src/test/test_prob_distr.c b/src/test/test_prob_distr.c
new file mode 100644
index 0000000000..ff23f01033
--- /dev/null
+++ b/src/test/test_prob_distr.c
@@ -0,0 +1,1428 @@
+/* Copyright (c) 2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file test_prob_distr.c
+ * \brief Test probability distributions.
+ * \detail
+ *
+ * For each probability distribution we do two kinds of tests:
+ *
+ * a) We do numerical deterministic testing of their cdf/icdf/sf/isf functions
+ *    and the various relationships between them for each distribution. We also
+ *    do deterministic tests on their sampling functions. Test vectors for
+ *    these tests were computed from alternative implementations and were
+ *    eyeballed to make sure they make sense
+ *    (e.g. src/test/prob_distr_mpfr_ref.c computes logit(p) using GNU mpfr
+ *    with 200-bit precision and is then tested in test_logit_logistic()).
+ *
+ * b) We do stochastic hypothesis testing (G-test) to ensure that sampling from
+ *    the given distributions is distributed properly. The stochastic tests are
+ *    slow and their false positive rate is not well suited for CI, so they are
+ *    currently disabled-by-default and put into 'tests-slow'.
+ */
+
+#define PROB_DISTR_PRIVATE
+
+#include "orconfig.h"
+
+#include "test/test.h"
+
+#include "core/or/or.h"
+
+#include "lib/math/prob_distr.h"
+#include "lib/math/fp.h"
+#include "lib/crypt_ops/crypto_rand.h"
+
+#include <float.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/**
+ * Return floor(d) converted to size_t, as a workaround for complaints
+ * under -Wbad-function-cast for (size_t)floor(d).
+ */
+static size_t
+floor_to_size_t(double d)
+{
+  double integral_d = floor(d);
+  return (size_t)integral_d;
+}
+
+/**
+ * Return ceil(d) converted to size_t, as a workaround for complaints
+ * under -Wbad-function-cast for (size_t)ceil(d).
+ */
+static size_t
+ceil_to_size_t(double d)
+{
+  double integral_d = ceil(d);
+  return (size_t)integral_d;
+}
+
+/*
+ * Geometric(p) distribution, supported on {1, 2, 3, ...}.
+ *
+ * Compute the probability mass function Geom(n; p) of the number of
+ * trials before the first success when success has probability p.
+ */
+static double
+logpmf_geometric(unsigned n, double p)
+{
+  /* This is actually a check against 1, but we do >= so that the compiler
+     does not raise a -Wfloat-equal */
+  if (p >= 1) {
+    if (n == 1)
+      return 0;
+    else
+      return -HUGE_VAL;
+  }
+  return (n - 1)*log1p(-p) + log(p);
+}
+
+/**
+ * Compute the logistic function, translated in output by 1/2:
+ * logistichalf(x) = logistic(x) - 1/2.  Well-conditioned on the entire
+ * real plane, with maximum condition number 1 at 0.
+ *
+ * This implementation gives relative error bounded by 5 eps.
+ */
+static double
+logistichalf(double x)
+{
+  /*
+   * Rewrite this with the identity
+   *
+   *  1/(1 + e^{-x}) - 1/2
+   *  = (1 - 1/2 - e^{-x}/2)/(1 + e^{-x})
+   *  = (1/2 - e^{-x}/2)/(1 + e^{-x})
+   *  = (1 - e^{-x})/[2 (1 + e^{-x})]
+   *  = -(e^{-x} - 1)/[2 (1 + e^{-x})],
+   *
+   * which we can evaluate by -expm1(-x)/[2 (1 + exp(-x))].
+   *
+   * Suppose exp has error d0, + has error d1, expm1 has error
+   * d2, and / has error d3, so we evaluate
+   *
+   *  -(1 + d2) (1 + d3) (e^{-x} - 1)
+   *    / [2 (1 + d1) (1 + (1 + d0) e^{-x})].
+   *
+   * In the denominator,
+   *
+   *  1 + (1 + d0) e^{-x}
+   *  = 1 + e^{-x} + d0 e^{-x}
+   *  = (1 + e^{-x}) (1 + d0 e^{-x}/(1 + e^{-x})),
+   *
+   * so the relative error of the numerator is
+   *
+   *  d' = d2 + d3 + d2 d3,
+   * and of the denominator,
+   *  d'' = d1 + d0 e^{-x}/(1 + e^{-x}) + d0 d1 e^{-x}/(1 + e^{-x})
+   *      = d1 + d0 L(-x) + d0 d1 L(-x),
+   *
+   * where L(-x) is logistic(-x).  By Lemma 1 the relative error
+   * of the quotient is bounded by
+   *
+   *  2|d2 + d3 + d2 d3 - d1 - d0 L(x) + d0 d1 L(x)|,
+   *
+   * Since 0 < L(x) < 1, this is bounded by
+   *
+   *  2|d2| + 2|d3| + 2|d2 d3| + 2|d1| + 2|d0| + 2|d0 d1|
+   *  <= 4 eps + 2 eps^2.
+   */
+  if (x < log(DBL_EPSILON/8)) {
+    /*
+     * Avoid overflow in e^{-x}.  When x < log(eps/4), we
+     * we further have x < logit(eps/4), so that
+     * logistic(x) < eps/4.  Hence the relative error of
+     * logistic(x) - 1/2 from -1/2 is bounded by eps/2, and
+     * so the relative error of -1/2 from logistic(x) - 1/2
+     * is bounded by eps.
+     */
+    return -0.5;
+  } else {
+    return -expm1(-x)/(2*(1 + exp(-x)));
+  }
+}
+
+/**
+ * Compute the log of the sum of the exps.  Caller should arrange the
+ * array in descending order to minimize error because I don't want to
+ * deal with using temporary space and the one caller in this file
+ * arranges that anyway.
+ *
+ * Warning: This implementation does not handle infinite or NaN inputs
+ * sensibly, because I don't need that here at the moment.  (NaN, or
+ * -inf and +inf together, should yield NaN; +inf and finite should
+ * yield +inf; otherwise all -inf should be ignored because exp(-inf) =
+ * 0.)
+ */
+static double
+logsumexp(double *A, size_t n)
+{
+  double maximum, sum;
+  size_t i;
+
+  if (n == 0)
+    return log(0);
+
+  maximum = A[0];
+  for (i = 1; i < n; i++) {
+    if (A[i] > maximum)
+      maximum = A[i];
+  }
+
+  sum = 0;
+  for (i = n; i --> 0;)
+    sum += exp(A[i] - maximum);
+
+  return log(sum) + maximum;
+}
+
+/**
+ * Compute log(1 - e^x).  Defined only for negative x so that e^x < 1.
+ * This is the complement of a probability in log space.
+ */
+static double
+log1mexp(double x)
+{
+
+  /*
+   * We want to compute log on [0, 1/2) but log1p on [1/2, +inf),
+   * so partition x at -log(2) = log(1/2).
+   */
+  if (-log(2) < x)
+    return log(-expm1(x));
+  else
+    return log1p(-exp(x));
+}
+
+/*
+ * Tests of numerical errors in computing logit, logistic, and the
+ * various cdfs, sfs, icdfs, and isfs.
+ */
+
+#define arraycount(A) (sizeof(A)/sizeof(A[0]))
+
+/** Return relative error between <b>actual</b> and <b>expected</b>.
+ *  Special cases: If <b>expected</b> is zero or infinite, return 1 if
+ *  <b>actual</b> is equal to <b>expected</b> and 0 if not, since the
+ *  usual notion of relative error is undefined but we only use this
+ *  for testing relerr(e, a) <= bound.  If either is NaN, return NaN,
+ *  which has the property that NaN <= bound is false no matter what
+ *  bound is.
+ *
+ *  Beware: if you test !(relerr(e, a) > bound), then then the result
+ *  is true when a is NaN because NaN > bound is false too.  See
+ *  CHECK_RELERR for correct use to decide when to report failure.
+ */
+static double
+relerr(double expected, double actual)
+{
+  /*
+   * To silence -Wfloat-equal, we have to test for equality using
+   * inequalities: we have (fabs(expected) <= 0) iff (expected == 0),
+   * and (actual <= expected && actual >= expected) iff actual ==
+   * expected whether expected is zero or infinite.
+   */
+  if (fabs(expected) <= 0 || tor_isinf(expected)) {
+    if (actual <= expected && actual >= expected)
+      return 0;
+    else
+      return 1;
+  } else {
+    return fabs((expected - actual)/expected);
+  }
+}
+
+/** Check that relative error of <b>expected</b> and <b>actual</b> is within
+ *  <b>relerr_bound</b>.  Caller must arrange to have i and relerr_bound in
+ *  scope.  */
+#define CHECK_RELERR(expected, actual) do {                                   \
+  double check_expected = (expected);                                         \
+  double check_actual = (actual);                                             \
+  const char *str_expected = #expected;                                       \
+  const char *str_actual = #actual;                                           \
+  double check_relerr = relerr(expected, actual);                             \
+  if (!(relerr(check_expected, check_actual) <= relerr_bound)) {              \
+    log_warn(LD_GENERAL, "%s:%d: case %u: relerr(%s=%.17e, %s=%.17e)"        \
+             " = %.17e > %.17e\n",                                            \
+             __func__, __LINE__, (unsigned) i,                                \
+             str_expected, check_expected,                                    \
+             str_actual, check_actual,                                        \
+             check_relerr, relerr_bound);                                     \
+    ok = false;                                                               \
+  }                                                                           \
+} while (0)
+
+/* Check that a <= b.
+ * Caller must arrange to have i in scope.  */
+#define CHECK_LE(a, b) do {                                                   \
+  double check_a = (a);                                                       \
+  double check_b = (b);                                                       \
+  const char *str_a = #a;                                                     \
+  const char *str_b = #b;                                                     \
+  if (!(check_a <= check_b)) {                                                \
+    log_warn(LD_GENERAL, "%s:%d: case %u: %s=%.17e > %s=%.17e\n",             \
+             __func__, __LINE__, (unsigned) i,                                \
+             str_a, check_a, str_b, check_b);                                 \
+    ok = false;                                                               \
+  }                                                                           \
+} while (0)
+
+/**
+ * Test the logit and logistic functions.  Confirm that they agree with
+ * the cdf, sf, icdf, and isf of the standard Logistic distribution.
+ * Confirm that the sampler for the standard logistic distribution maps
+ * [0, 1] into the right subinterval for the inverse transform, for
+ * this implementation.
+ */
+static void
+test_logit_logistic(void *arg)
+{
+  (void) arg;
+
+  static const struct {
+    double x;                   /* x = logit(p) */
+    double p;                   /* p = logistic(x) */
+    double phalf;               /* p - 1/2 = logistic(x) - 1/2 */
+  } cases[] = {
+    { -HUGE_VAL, 0, -0.5 },
+    { -1000, 0, -0.5 },
+    { -710, 4.47628622567513e-309, -0.5 },
+    { -708, 3.307553003638408e-308, -0.5 },
+    { -2, .11920292202211755, -.3807970779778824 },
+    { -1.0000001, .2689414017088022, -.23105859829119776 },
+    { -1, .2689414213699951, -.23105857863000487 },
+    { -0.9999999, .26894144103118883, -.2310585589688111 },
+    /* see src/test/prob_distr_mpfr_ref.c for computation */
+    { -4.000000000537333e-5, .49999, -1.0000000000010001e-5 },
+    { -4.000000000533334e-5, .49999, -.00001 },
+    { -4.000000108916878e-9, .499999999, -1.0000000272292198e-9 },
+    { -4e-9, .499999999, -1e-9 },
+    { -4e-16, .5, -1e-16 },
+    { -4e-300, .5, -1e-300 },
+    { 0, .5, 0 },
+    { 4e-300, .5, 1e-300 },
+    { 4e-16, .5, 1e-16 },
+    { 3.999999886872274e-9, .500000001, 9.999999717180685e-10 },
+    { 4e-9, .500000001, 1e-9 },
+    { 4.0000000005333336e-5, .50001, .00001 },
+    { 8.000042667076272e-3, .502, .002 },
+    { 0.9999999, .7310585589688111, .2310585589688111 },
+    { 1, .7310585786300049, .23105857863000487 },
+    { 1.0000001, .7310585982911977, .23105859829119774 },
+    { 2, .8807970779778823, .3807970779778824 },
+    { 708, 1, .5 },
+    { 710, 1, .5 },
+    { 1000, 1, .5 },
+    { HUGE_VAL, 1, .5 },
+  };
+  double relerr_bound = 3e-15; /* >10eps */
+  size_t i;
+  bool ok = true;
+
+  for (i = 0; i < arraycount(cases); i++) {
+    double x = cases[i].x;
+    double p = cases[i].p;
+    double phalf = cases[i].phalf;
+
+    /*
+     * cdf is logistic, icdf is logit, and symmetry for
+     * sf/isf.
+     */
+    CHECK_RELERR(logistic(x), cdf_logistic(x, 0, 1));
+    CHECK_RELERR(logistic(-x), sf_logistic(x, 0, 1));
+    CHECK_RELERR(logit(p), icdf_logistic(p, 0, 1));
+    CHECK_RELERR(-logit(p), isf_logistic(p, 0, 1));
+
+    CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2, 0, 2));
+    CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2, 0, 2));
+    CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0, 2)/2);
+    CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, 2)/2);
+
+    CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x/2, 0, .5));
+    CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x/2, 0, .5));
+    CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0,.5)*2);
+    CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, .5)*2);
+
+    CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2 + 1, 1, 2));
+    CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2 + 1, 1, 2));
+
+    /*
+     * For p near 0 and p near 1/2, the arithmetic of
+     * translating by 1 loses precision.
+     */
+    if (fabs(p) > DBL_EPSILON && fabs(p) < 0.4) {
+      CHECK_RELERR(icdf_logistic(p, 0, 1),
+          (icdf_logistic(p, 1, 2) - 1)/2);
+      CHECK_RELERR(isf_logistic(p, 0, 1),
+          (isf_logistic(p, 1, 2) - 1)/2);
+    }
+
+    CHECK_RELERR(p, logistic(x));
+    CHECK_RELERR(phalf, logistichalf(x));
+
+    /*
+     * On the interior floating-point numbers, either logit or
+     * logithalf had better give the correct answer.
+     *
+     * For probabilities near 0, we can get much finer resolution with
+     * logit, and for probabilities near 1/2, we can get much finer
+     * resolution with logithalf by representing them using p - 1/2.
+     *
+     * E.g., we can write -.00001 for phalf, and .49999 for p, but the
+     * difference 1/2 - .00001 gives 1.0000000000010001e-5 in binary64
+     * arithmetic.  So test logit(.49999) which should give the same
+     * answer as logithalf(-1.0000000000010001e-5), namely
+     * -4.000000000537333e-5, and also test logithalf(-.00001) which
+     * gives -4.000000000533334e-5 instead -- but don't expect
+     * logit(.49999) to give -4.000000000533334e-5 even though it looks
+     * like 1/2 - .00001.
+     *
+     * A naive implementation of logit will just use log(p/(1 - p)) and
+     * give the answer -4.000000000551673e-05 for .49999, which is
+     * wrong in a lot of digits, which happens because log is
+     * ill-conditioned near 1 and thus amplifies whatever relative
+     * error we made in computing p/(1 - p).
+     */
+    if ((0 < p && p < 1) || tor_isinf(x)) {
+      if (phalf >= p - 0.5 && phalf <= p - 0.5)
+        CHECK_RELERR(x, logit(p));
+      if (p >= 0.5 + phalf && p <= 0.5 + phalf)
+        CHECK_RELERR(x, logithalf(phalf));
+    }
+
+    CHECK_RELERR(-phalf, logistichalf(-x));
+    if (fabs(phalf) < 0.5 || tor_isinf(x))
+      CHECK_RELERR(-x, logithalf(-phalf));
+    if (p < 1 || tor_isinf(x)) {
+      CHECK_RELERR(1 - p, logistic(-x));
+      if (p > .75 || tor_isinf(x))
+        CHECK_RELERR(-x, logit(1 - p));
+    } else {
+      CHECK_LE(logistic(-x), 1e-300);
+    }
+  }
+
+  for (i = 0; i <= 100; i++) {
+    double p0 = (double)i/100;
+
+    CHECK_RELERR(logit(p0/(1 + M_E)), sample_logistic(0, 0, p0));
+    CHECK_RELERR(-logit(p0/(1 + M_E)), sample_logistic(1, 0, p0));
+    CHECK_RELERR(logithalf(p0*(0.5 - 1/(1 + M_E))),
+        sample_logistic(0, 1, p0));
+    CHECK_RELERR(-logithalf(p0*(0.5 - 1/(1 + M_E))),
+        sample_logistic(1, 1, p0));
+  }
+
+  if (!ok)
+    printf("fail logit/logistic / logistic cdf/sf\n");
+
+  tt_assert(ok);
+
+ done:
+  ;
+}
+
+/**
+ * Test the cdf, sf, icdf, and isf of the LogLogistic distribution.
+ */
+static void
+test_log_logistic(void *arg)
+{
+  (void) arg;
+
+  static const struct {
+    /* x is a point in the support of the LogLogistic distribution */
+    double x;
+    /* 'p' is the probability that a random variable X for a given LogLogistic
+     * probability ditribution will take value less-or-equal to x */
+    double p;
+    /* 'np' is the probability that a random variable X for a given LogLogistic
+     * probability distribution will take value greater-or-equal to x. */
+    double np;
+  } cases[] = {
+    { 0, 0, 1 },
+    { 1e-300, 1e-300, 1 },
+    { 1e-17, 1e-17, 1 },
+    { 1e-15, 1e-15, .999999999999999 },
+    { .1, .09090909090909091, .90909090909090909 },
+    { .25, .2, .8 },
+    { .5, .33333333333333333, .66666666666666667 },
+    { .75, .42857142857142855, .5714285714285714 },
+    { .9999, .49997499874993756, .5000250012500626 },
+    { .99999999, .49999999749999996, .5000000025 },
+    { .999999999999999, .49999999999999994, .5000000000000002 },
+    { 1, .5, .5 },
+  };
+  double relerr_bound = 3e-15;
+  size_t i;
+  bool ok = true;
+
+  for (i = 0; i < arraycount(cases); i++) {
+    double x = cases[i].x;
+    double p = cases[i].p;
+    double np = cases[i].np;
+
+    CHECK_RELERR(p, cdf_log_logistic(x, 1, 1));
+    CHECK_RELERR(p, cdf_log_logistic(x/2, .5, 1));
+    CHECK_RELERR(p, cdf_log_logistic(x*2, 2, 1));
+    CHECK_RELERR(p, cdf_log_logistic(sqrt(x), 1, 2));
+    CHECK_RELERR(p, cdf_log_logistic(sqrt(x)/2, .5, 2));
+    CHECK_RELERR(p, cdf_log_logistic(sqrt(x)*2, 2, 2));
+    if (2*sqrt(DBL_MIN) < x) {
+      CHECK_RELERR(p, cdf_log_logistic(x*x, 1, .5));
+      CHECK_RELERR(p, cdf_log_logistic(x*x/2, .5, .5));
+      CHECK_RELERR(p, cdf_log_logistic(x*x*2, 2, .5));
+    }
+
+    CHECK_RELERR(np, sf_log_logistic(x, 1, 1));
+    CHECK_RELERR(np, sf_log_logistic(x/2, .5, 1));
+    CHECK_RELERR(np, sf_log_logistic(x*2, 2, 1));
+    CHECK_RELERR(np, sf_log_logistic(sqrt(x), 1, 2));
+    CHECK_RELERR(np, sf_log_logistic(sqrt(x)/2, .5, 2));
+    CHECK_RELERR(np, sf_log_logistic(sqrt(x)*2, 2, 2));
+    if (2*sqrt(DBL_MIN) < x) {
+      CHECK_RELERR(np, sf_log_logistic(x*x, 1, .5));
+      CHECK_RELERR(np, sf_log_logistic(x*x/2, .5, .5));
+      CHECK_RELERR(np, sf_log_logistic(x*x*2, 2, .5));
+    }
+
+    CHECK_RELERR(np, cdf_log_logistic(1/x, 1, 1));
+    CHECK_RELERR(np, cdf_log_logistic(1/(2*x), .5, 1));
+    CHECK_RELERR(np, cdf_log_logistic(2/x, 2, 1));
+    CHECK_RELERR(np, cdf_log_logistic(1/sqrt(x), 1, 2));
+    CHECK_RELERR(np, cdf_log_logistic(1/(2*sqrt(x)), .5, 2));
+    CHECK_RELERR(np, cdf_log_logistic(2/sqrt(x), 2, 2));
+    if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) {
+      CHECK_RELERR(np, cdf_log_logistic(1/(x*x), 1, .5));
+      CHECK_RELERR(np, cdf_log_logistic(1/(2*x*x), .5, .5));
+      CHECK_RELERR(np, cdf_log_logistic(2/(x*x), 2, .5));
+    }
+
+    CHECK_RELERR(p, sf_log_logistic(1/x, 1, 1));
+    CHECK_RELERR(p, sf_log_logistic(1/(2*x), .5, 1));
+    CHECK_RELERR(p, sf_log_logistic(2/x, 2, 1));
+    CHECK_RELERR(p, sf_log_logistic(1/sqrt(x), 1, 2));
+    CHECK_RELERR(p, sf_log_logistic(1/(2*sqrt(x)), .5, 2));
+    CHECK_RELERR(p, sf_log_logistic(2/sqrt(x), 2, 2));
+    if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) {
+      CHECK_RELERR(p, sf_log_logistic(1/(x*x), 1, .5));
+      CHECK_RELERR(p, sf_log_logistic(1/(2*x*x), .5, .5));
+      CHECK_RELERR(p, sf_log_logistic(2/(x*x), 2, .5));
+    }
+
+    CHECK_RELERR(x, icdf_log_logistic(p, 1, 1));
+    CHECK_RELERR(x/2, icdf_log_logistic(p, .5, 1));
+    CHECK_RELERR(x*2, icdf_log_logistic(p, 2, 1));
+    CHECK_RELERR(x, icdf_log_logistic(p, 1, 1));
+    CHECK_RELERR(sqrt(x)/2, icdf_log_logistic(p, .5, 2));
+    CHECK_RELERR(sqrt(x)*2, icdf_log_logistic(p, 2, 2));
+    CHECK_RELERR(sqrt(x), icdf_log_logistic(p, 1, 2));
+    CHECK_RELERR(x*x/2, icdf_log_logistic(p, .5, .5));
+    CHECK_RELERR(x*x*2, icdf_log_logistic(p, 2, .5));
+
+    if (np < .9) {
+      CHECK_RELERR(x, isf_log_logistic(np, 1, 1));
+      CHECK_RELERR(x/2, isf_log_logistic(np, .5, 1));
+      CHECK_RELERR(x*2, isf_log_logistic(np, 2, 1));
+      CHECK_RELERR(sqrt(x), isf_log_logistic(np, 1, 2));
+      CHECK_RELERR(sqrt(x)/2, isf_log_logistic(np, .5, 2));
+      CHECK_RELERR(sqrt(x)*2, isf_log_logistic(np, 2, 2));
+      CHECK_RELERR(x*x, isf_log_logistic(np, 1, .5));
+      CHECK_RELERR(x*x/2, isf_log_logistic(np, .5, .5));
+      CHECK_RELERR(x*x*2, isf_log_logistic(np, 2, .5));
+
+      CHECK_RELERR(1/x, icdf_log_logistic(np, 1, 1));
+      CHECK_RELERR(1/(2*x), icdf_log_logistic(np, .5, 1));
+      CHECK_RELERR(2/x, icdf_log_logistic(np, 2, 1));
+      CHECK_RELERR(1/sqrt(x), icdf_log_logistic(np, 1, 2));
+      CHECK_RELERR(1/(2*sqrt(x)),
+          icdf_log_logistic(np, .5, 2));
+      CHECK_RELERR(2/sqrt(x), icdf_log_logistic(np, 2, 2));
+      CHECK_RELERR(1/(x*x), icdf_log_logistic(np, 1, .5));
+      CHECK_RELERR(1/(2*x*x), icdf_log_logistic(np, .5, .5));
+      CHECK_RELERR(2/(x*x), icdf_log_logistic(np, 2, .5));
+    }
+
+    CHECK_RELERR(1/x, isf_log_logistic(p, 1, 1));
+    CHECK_RELERR(1/(2*x), isf_log_logistic(p, .5, 1));
+    CHECK_RELERR(2/x, isf_log_logistic(p, 2, 1));
+    CHECK_RELERR(1/sqrt(x), isf_log_logistic(p, 1, 2));
+    CHECK_RELERR(1/(2*sqrt(x)), isf_log_logistic(p, .5, 2));
+    CHECK_RELERR(2/sqrt(x), isf_log_logistic(p, 2, 2));
+    CHECK_RELERR(1/(x*x), isf_log_logistic(p, 1, .5));
+    CHECK_RELERR(1/(2*x*x), isf_log_logistic(p, .5, .5));
+    CHECK_RELERR(2/(x*x), isf_log_logistic(p, 2, .5));
+  }
+
+  for (i = 0; i <= 100; i++) {
+    double p0 = (double)i/100;
+
+    CHECK_RELERR(0.5*p0/(1 - 0.5*p0), sample_log_logistic(0, p0));
+    CHECK_RELERR((1 - 0.5*p0)/(0.5*p0),
+        sample_log_logistic(1, p0));
+  }
+
+  if (!ok)
+    printf("fail log logistic cdf/sf\n");
+
+  tt_assert(ok);
+
+ done:
+  ;
+}
+
+/**
+ * Test the cdf, sf, icdf, isf of the Weibull distribution.
+ */
+static void
+test_weibull(void *arg)
+{
+  (void) arg;
+
+  static const struct {
+    /* x is a point in the support of the Weibull distribution */
+    double x;
+    /* 'p' is the probability that a random variable X for a given Weibull
+     * probability ditribution will take value less-or-equal to x */
+    double p;
+    /* 'np' is the probability that a random variable X for a given Weibull
+     * probability distribution will take value greater-or-equal to x. */
+    double np;
+  } cases[] = {
+    { 0, 0, 1 },
+    { 1e-300, 1e-300, 1 },
+    { 1e-17, 1e-17, 1 },
+    { .1, .09516258196404043, .9048374180359595 },
+    { .5, .3934693402873666, .6065306597126334 },
+    { .6931471805599453, .5, .5 },
+    { 1, .6321205588285577, .36787944117144233 },
+    { 10, .9999546000702375, 4.5399929762484854e-5 },
+    { 36, .9999999999999998, 2.319522830243569e-16 },
+    { 37, .9999999999999999, 8.533047625744066e-17 },
+    { 38, 1, 3.1391327920480296e-17 },
+    { 100, 1, 3.720075976020836e-44 },
+    { 708, 1, 3.307553003638408e-308 },
+    { 710, 1, 4.47628622567513e-309 },
+    { 1000, 1, 0 },
+    { HUGE_VAL, 1, 0 },
+  };
+  double relerr_bound = 3e-15;
+  size_t i;
+  bool ok = true;
+
+  for (i = 0; i < arraycount(cases); i++) {
+    double x = cases[i].x;
+    double p = cases[i].p;
+    double np = cases[i].np;
+
+    CHECK_RELERR(p, cdf_weibull(x, 1, 1));
+    CHECK_RELERR(p, cdf_weibull(x/2, .5, 1));
+    CHECK_RELERR(p, cdf_weibull(x*2, 2, 1));
+    /* For 0 < x < sqrt(DBL_MIN), x^2 loses lots of bits.  */
+    if (x <= 0 ||
+        sqrt(DBL_MIN) <= x) {
+      CHECK_RELERR(p, cdf_weibull(x*x, 1, .5));
+      CHECK_RELERR(p, cdf_weibull(x*x/2, .5, .5));
+      CHECK_RELERR(p, cdf_weibull(x*x*2, 2, .5));
+    }
+    CHECK_RELERR(p, cdf_weibull(sqrt(x), 1, 2));
+    CHECK_RELERR(p, cdf_weibull(sqrt(x)/2, .5, 2));
+    CHECK_RELERR(p, cdf_weibull(sqrt(x)*2, 2, 2));
+    CHECK_RELERR(np, sf_weibull(x, 1, 1));
+    CHECK_RELERR(np, sf_weibull(x/2, .5, 1));
+    CHECK_RELERR(np, sf_weibull(x*2, 2, 1));
+    CHECK_RELERR(np, sf_weibull(x*x, 1, .5));
+    CHECK_RELERR(np, sf_weibull(x*x/2, .5, .5));
+    CHECK_RELERR(np, sf_weibull(x*x*2, 2, .5));
+    if (x >= 10) {
+      /*
+       * exp amplifies the error of sqrt(x)^2
+       * proportionally to exp(x); for large inputs
+       * this is significant.
+       */
+      double t = -expm1(-x*(2*DBL_EPSILON + DBL_EPSILON));
+      relerr_bound = t + DBL_EPSILON + t*DBL_EPSILON;
+      if (relerr_bound < 3e-15)
+        /*
+         * The tests are written only to 16
+         * decimal places anyway even if your
+         * `double' is, say, i387 binary80, for
+         * whatever reason.
+         */
+        relerr_bound = 3e-15;
+      CHECK_RELERR(np, sf_weibull(sqrt(x), 1, 2));
+      CHECK_RELERR(np, sf_weibull(sqrt(x)/2, .5, 2));
+      CHECK_RELERR(np, sf_weibull(sqrt(x)*2, 2, 2));
+    }
+
+    if (p <= 0.75) {
+      /*
+       * For p near 1, not enough precision near 1 to
+       * recover x.
+       */
+      CHECK_RELERR(x, icdf_weibull(p, 1, 1));
+      CHECK_RELERR(x/2, icdf_weibull(p, .5, 1));
+      CHECK_RELERR(x*2, icdf_weibull(p, 2, 1));
+    }
+    if (p >= 0.25 && !tor_isinf(x) && np > 0) {
+      /*
+       * For p near 0, not enough precision in np
+       * near 1 to recover x.  For 0, isf gives inf,
+       * even if p is precise enough for the icdf to
+       * work.
+       */
+      CHECK_RELERR(x, isf_weibull(np, 1, 1));
+      CHECK_RELERR(x/2, isf_weibull(np, .5, 1));
+      CHECK_RELERR(x*2, isf_weibull(np, 2, 1));
+    }
+  }
+
+  for (i = 0; i <= 100; i++) {
+    double p0 = (double)i/100;
+
+    CHECK_RELERR(3*sqrt(-log(p0/2)), sample_weibull(0, p0, 3, 2));
+    CHECK_RELERR(3*sqrt(-log1p(-p0/2)),
+        sample_weibull(1, p0, 3, 2));
+  }
+
+  if (!ok)
+    printf("fail Weibull cdf/sf\n");
+
+  tt_assert(ok);
+
+ done:
+  ;
+}
+
+/**
+ * Test the cdf, sf, icdf, and isf of the generalized Pareto
+ * distribution.
+ */
+static void
+test_genpareto(void *arg)
+{
+  (void) arg;
+
+  struct {
+    /* xi is the 'xi' parameter of the generalized Pareto distribution, and the
+     * rest are the same as in the above tests */
+    double xi, x, p, np;
+  } cases[] = {
+    { 0, 0, 0, 1 },
+    { 1e-300, .004, 3.992010656008528e-3, .9960079893439915 },
+    { 1e-300, .1, .09516258196404043, .9048374180359595 },
+    { 1e-300, 1, .6321205588285577, .36787944117144233 },
+    { 1e-300, 10, .9999546000702375, 4.5399929762484854e-5 },
+    { 1e-200, 1e-16, 9.999999999999999e-17, .9999999999999999 },
+    { 1e-16, 1e-200, 9.999999999999998e-201, 1 },
+    { 1e-16, 1e-16, 1e-16, 1 },
+    { 1e-16, .004, 3.992010656008528e-3, .9960079893439915 },
+    { 1e-16, .1, .09516258196404043, .9048374180359595 },
+    { 1e-16, 1, .6321205588285577, .36787944117144233 },
+    { 1e-16, 10, .9999546000702375, 4.539992976248509e-5 },
+    { 1e-10, 1e-6, 9.999995000001667e-7, .9999990000005 },
+    { 1e-8, 1e-8, 9.999999950000001e-9, .9999999900000001 },
+    { 1, 1e-300, 1e-300, 1 },
+    { 1, 1e-16, 1e-16, .9999999999999999 },
+    { 1, .1, .09090909090909091, .9090909090909091 },
+    { 1, 1, .5, .5 },
+    { 1, 10, .9090909090909091, .0909090909090909 },
+    { 1, 100, .9900990099009901, .0099009900990099 },
+    { 1, 1000, .999000999000999, 9.990009990009992e-4 },
+    { 10, 1e-300, 1e-300, 1 },
+    { 10, 1e-16, 9.999999999999995e-17, .9999999999999999 },
+    { 10, .1, .06696700846319258, .9330329915368074 },
+    { 10, 1, .21320655780322778, .7867934421967723 },
+    { 10, 10, .3696701667040189, .6303298332959811 },
+    { 10, 100, .49886285755007337, .5011371424499267 },
+    { 10, 1000, .6018968102992647, .3981031897007353 },
+  };
+  double xi_array[] = { -1.5, -1, -1e-30, 0, 1e-30, 1, 1.5 };
+  size_t i, j;
+  double relerr_bound = 3e-15;
+  bool ok = true;
+
+  for (i = 0; i < arraycount(cases); i++) {
+    double xi = cases[i].xi;
+    double x = cases[i].x;
+    double p = cases[i].p;
+    double np = cases[i].np;
+
+    CHECK_RELERR(p, cdf_genpareto(x, 0, 1, xi));
+    CHECK_RELERR(p, cdf_genpareto(x*2, 0, 2, xi));
+    CHECK_RELERR(p, cdf_genpareto(x/2, 0, .5, xi));
+    CHECK_RELERR(np, sf_genpareto(x, 0, 1, xi));
+    CHECK_RELERR(np, sf_genpareto(x*2, 0, 2, xi));
+    CHECK_RELERR(np, sf_genpareto(x/2, 0, .5, xi));
+
+    if (p < .5) {
+      CHECK_RELERR(x, icdf_genpareto(p, 0, 1, xi));
+      CHECK_RELERR(x*2, icdf_genpareto(p, 0, 2, xi));
+      CHECK_RELERR(x/2, icdf_genpareto(p, 0, .5, xi));
+    }
+    if (np < .5) {
+      CHECK_RELERR(x, isf_genpareto(np, 0, 1, xi));
+      CHECK_RELERR(x*2, isf_genpareto(np, 0, 2, xi));
+      CHECK_RELERR(x/2, isf_genpareto(np, 0, .5, xi));
+    }
+  }
+
+  for (i = 0; i < arraycount(xi_array); i++) {
+    for (j = 0; j <= 100; j++) {
+      double p0 = (j == 0 ? 2*DBL_MIN : (double)j/100);
+
+      /* This is actually a check against 0, but we do <= so that the compiler
+         does not raise a -Wfloat-equal */
+      if (fabs(xi_array[i]) <= 0) {
+        /*
+         * When xi == 0, the generalized Pareto
+         * distribution reduces to an
+         * exponential distribution.
+         */
+        CHECK_RELERR(-log(p0/2),
+            sample_genpareto(0, p0, 0));
+        CHECK_RELERR(-log1p(-p0/2),
+            sample_genpareto(1, p0, 0));
+      } else {
+        CHECK_RELERR(expm1(-xi_array[i]*log(p0/2))/xi_array[i],
+            sample_genpareto(0, p0, xi_array[i]));
+        CHECK_RELERR((j == 0 ? DBL_MIN :
+                expm1(-xi_array[i]*log1p(-p0/2))/xi_array[i]),
+            sample_genpareto(1, p0, xi_array[i]));
+      }
+
+      CHECK_RELERR(isf_genpareto(p0/2, 0, 1, xi_array[i]),
+          sample_genpareto(0, p0, xi_array[i]));
+      CHECK_RELERR(icdf_genpareto(p0/2, 0, 1, xi_array[i]),
+          sample_genpareto(1, p0, xi_array[i]));
+    }
+  }
+
+  tt_assert(ok);
+
+ done:
+  ;
+}
+
+/**
+ * Test the deterministic sampler for uniform distribution on [a, b].
+ *
+ * This currently only tests whether the outcome lies within [a, b].
+ */
+static void
+test_uniform_interval(void *arg)
+{
+  (void) arg;
+  struct {
+    /* Sample from a uniform distribution with parameters 'a' and 'b', using
+     * 't' as the sampling index. */
+    double t, a, b;
+  } cases[] = {
+    { 0, 0, 0 },
+    { 0, 0, 1 },
+    { 0, 1.0000000000000007, 3.999999999999995 },
+    { 0, 4000, 4000 },
+    { 0.42475836677491291, 4000, 4000 },
+    { 0, -DBL_MAX, DBL_MAX },
+    { 0.25, -DBL_MAX, DBL_MAX },
+    { 0.5, -DBL_MAX, DBL_MAX },
+  };
+  size_t i = 0;
+  bool ok = true;
+
+  for (i = 0; i < arraycount(cases); i++) {
+    double t = cases[i].t;
+    double a = cases[i].a;
+    double b = cases[i].b;
+
+    CHECK_LE(a, sample_uniform_interval(t, a, b));
+    CHECK_LE(sample_uniform_interval(t, a, b), b);
+
+    CHECK_LE(a, sample_uniform_interval(1 - t, a, b));
+    CHECK_LE(sample_uniform_interval(1 - t, a, b), b);
+
+    CHECK_LE(sample_uniform_interval(t, -b, -a), -a);
+    CHECK_LE(-b, sample_uniform_interval(t, -b, -a));
+
+    CHECK_LE(sample_uniform_interval(1 - t, -b, -a), -a);
+    CHECK_LE(-b, sample_uniform_interval(1 - t, -b, -a));
+  }
+
+  tt_assert(ok);
+
+ done:
+  ;
+}
+
+/********************** Stochastic tests ****************************/
+
+/*
+ * Psi test, sometimes also called G-test.  The psi test statistic,
+ * suitably scaled, has chi^2 distribution, but the psi test tends to
+ * have better statistical power in practice to detect deviations than
+ * the chi^2 test does.  (The chi^2 test statistic is the first term of
+ * the Taylor expansion of the psi test statistic.)  The psi test is
+ * generic, for any CDF; particular distributions might have higher-
+ * power tests to distinguish them from predictable deviations or bugs.
+ *
+ * We choose the psi critical value so that a single psi test has
+ * probability below alpha = 1% of spuriously failing even if all the
+ * code is correct.  But the false positive rate for a suite of n tests
+ * is higher: 1 - Binom(0; n, alpha) = 1 - (1 - alpha)^n.  For n = 10,
+ * this is about 10%, and for n = 100 it is well over 50%.
+ *
+ * We can drive it down by running each test twice, and accepting it if
+ * it passes at least once; in that case, it is as if we used Binom(2;
+ * 2, alpha) = alpha^2 as the false positive rate for each test, and
+ * for n = 10 tests, it would be 0.1%, and for n = 100 tests, still
+ * only 1%.
+ *
+ * The critical value for a chi^2 distribution with 100 degrees of
+ * freedom and false positive rate alpha = 1% was taken from:
+ *
+ *  NIST/SEMATECH e-Handbook of Statistical Methods, Section
+ *  1.3.6.7.4 `Critical Values of the Chi-Square Distribution',
+ *  <http://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm>,
+ *  retrieved 2018-10-28.
+ */
+
+static const size_t NSAMPLES = 100000;
+/* Number of chances we give to the test to succeed. */
+static const unsigned NTRIALS = 2;
+/* Number of times we want the test to pass per NTRIALS. */
+static const unsigned NPASSES_MIN = 1;
+
+#define PSI_DF 100                          /* degrees of freedom */
+static const double PSI_CRITICAL = 135.807; /* critical value, alpha = .01 */
+
+/**
+ * Perform a psi test on an array of sample counts, C, adding up to N
+ * samples, and an array of log expected probabilities, logP,
+ * representing the null hypothesis for the distribution of samples
+ * counted.  Return false if the psi test rejects the null hypothesis,
+ * true if otherwise.
+ */
+static bool
+psi_test(const size_t C[PSI_DF], const double logP[PSI_DF], size_t N)
+{
+  double psi = 0;
+  double c = 0;                 /* Kahan compensation */
+  double t, u;
+  size_t i;
+
+  for (i = 0; i < PSI_DF; i++) {
+    /*
+     * c*log(c/(n*p)) = (1/n) * f*log(f/p) where f = c/n is
+     * the frequency, and f*log(f/p) ---> 0 as f ---> 0, so
+     * this is a reasonable choice.  Further, any mass that
+     * _fails_ to turn up in this bin will inflate another
+     * bin instead, so we don't really lose anything by
+     * ignoring empty bins even if they have high
+     * probability.
+     */
+    if (C[i] == 0)
+      continue;
+    t = C[i]*(log((double)C[i]/N) - logP[i]) - c;
+    u = psi + t;
+    c = (u - psi) - t;
+    psi = u;
+  }
+  psi *= 2;
+
+  return psi <= PSI_CRITICAL;
+}
+
+static bool
+test_stochastic_geometric_impl(double p)
+{
+  const struct geometric geometric = {
+    .base = GEOMETRIC(geometric),
+    .p = p,
+  };
+  double logP[PSI_DF] = {0};
+  unsigned ntry = NTRIALS, npass = 0;
+  unsigned i;
+  size_t j;
+
+  /* Compute logP[i] = Geom(i + 1; p).  */
+  for (i = 0; i < PSI_DF - 1; i++)
+    logP[i] = logpmf_geometric(i + 1, p);
+
+  /* Compute logP[n-1] = log (1 - (P[0] + P[1] + ... + P[n-2])).  */
+  logP[PSI_DF - 1] = log1mexp(logsumexp(logP, PSI_DF - 1));
+
+  while (ntry --> 0) {
+    size_t C[PSI_DF] = {0};
+
+    for (j = 0; j < NSAMPLES; j++) {
+      double n_tmp = dist_sample(&geometric.base);
+
+      /* Must be an integer.  (XXX -Wfloat-equal)  */
+      tor_assert(ceil(n_tmp) <= n_tmp && ceil(n_tmp) >= n_tmp);
+
+      /* Must be a positive integer.  */
+      tor_assert(n_tmp >= 1);
+
+      /* Probability of getting a value in the billions is negligible.  */
+      tor_assert(n_tmp <= (double)UINT_MAX);
+
+      unsigned n = (unsigned) n_tmp;
+
+      if (n > PSI_DF)
+        n = PSI_DF;
+      C[n - 1]++;
+    }
+
+    if (psi_test(C, logP, NSAMPLES)) {
+      if (++npass >= NPASSES_MIN)
+        break;
+    }
+  }
+
+  if (npass >= NPASSES_MIN) {
+    /* printf("pass %s sampler\n", "geometric"); */
+    return true;
+  } else {
+    printf("fail %s sampler\n", "geometric");
+    return false;
+  }
+}
+
+/**
+ * Divide the support of <b>dist</b> into histogram bins in <b>logP</b>. Start
+ * at the 1st percentile and ending at the 99th percentile. Pick the bin
+ * boundaries using linear interpolation so that they are uniformly spaced.
+ *
+ * In each bin logP[i] we insert the expected log-probability that a sampled
+ * value will fall into that bin. We will use this as the null hypothesis of
+ * the psi test.
+ *
+ * Set logP[i] = log(CDF(x_i) - CDF(x_{i-1})), where x_-1 = -inf, x_n =
+ * +inf, and x_i = i*(hi - lo)/(n - 2).
+ */
+static void
+bin_cdfs(const struct dist *dist, double lo, double hi, double *logP, size_t n)
+{
+#define CDF(x)  dist_cdf(dist, x)
+#define SF(x)   dist_sf(dist, x)
+  const double w = (hi - lo)/(n - 2);
+  double halfway = dist_icdf(dist, 0.5);
+  double x_0, x_1;
+  size_t i;
+  size_t n2 = ceil_to_size_t((halfway - lo)/w);
+
+  tor_assert(lo <= halfway);
+  tor_assert(halfway <= hi);
+  tor_assert(n2 <= n);
+
+  x_1 = lo;
+  logP[0] = log(CDF(x_1) - 0); /* 0 = CDF(-inf) */
+  for (i = 1; i < n2; i++) {
+    x_0 = x_1;
+    /* do the linear interpolation */
+    x_1 = (i <= n/2 ? lo + i*w : hi - (n - 2 - i)*w);
+    /* set the expected log-probability */
+    logP[i] = log(CDF(x_1) - CDF(x_0));
+  }
+  x_0 = hi;
+  logP[n - 1] = log(SF(x_0) - 0); /* 0 = SF(+inf) = 1 - CDF(+inf) */
+
+  /* In this loop we are filling out the high part of the array. We are using
+   * SF because in these cases the CDF is near 1 where precision is lower. So
+   * instead we are using SF near 0 where the precision is higher. We have
+   * SF(t) = 1 - CDF(t).  */
+  for (i = 1; i < n - n2; i++) {
+    x_1 = x_0;
+    /* do the linear interpolation */
+    x_0 = (i <= n/2 ? hi - i*w : lo + (n - 2 - i)*w);
+    /* set the expected log-probability */
+    logP[n - i - 1] = log(SF(x_0) - SF(x_1));
+  }
+#undef SF
+#undef CDF
+}
+
+/**
+ * Draw NSAMPLES samples from dist, counting the number of samples x in
+ * the ith bin C[i] if x_{i-1} <= x < x_i, where x_-1 = -inf, x_n =
+ * +inf, and x_i = i*(hi - lo)/(n - 2).
+ */
+static void
+bin_samples(const struct dist *dist, double lo, double hi, size_t *C, size_t n)
+{
+  const double w = (hi - lo)/(n - 2);
+  size_t i;
+
+  for (i = 0; i < NSAMPLES; i++) {
+    double x = dist_sample(dist);
+    size_t bin;
+
+    if (x < lo)
+      bin = 0;
+    else if (x < hi)
+      bin = 1 + floor_to_size_t((x - lo)/w);
+    else
+      bin = n - 1;
+    tor_assert(bin < n);
+    C[bin]++;
+  }
+}
+
+/**
+ * Carry out a Psi test on <b>dist</b>.
+ *
+ * Sample NSAMPLES from dist, putting them in bins from -inf to lo to
+ * hi to +inf, and apply up to two psi tests.  True if at least one psi
+ * test passes; false if not.  False positive rate should be bounded by
+ * 0.01^2 = 0.0001.
+ */
+static bool
+test_psi_dist_sample(const struct dist *dist)
+{
+  double logP[PSI_DF] = {0};
+  unsigned ntry = NTRIALS, npass = 0;
+  double lo = dist_icdf(dist, 1/(double)(PSI_DF + 2));
+  double hi = dist_isf(dist, 1/(double)(PSI_DF + 2));
+
+  /* Create the null hypothesis in logP */
+  bin_cdfs(dist, lo, hi, logP, PSI_DF);
+
+  /* Now run the test */
+  while (ntry --> 0) {
+    size_t C[PSI_DF] = {0};
+    bin_samples(dist, lo, hi, C, PSI_DF);
+    if (psi_test(C, logP, NSAMPLES)) {
+      if (++npass >= NPASSES_MIN)
+        break;
+    }
+  }
+
+  /* Did we fail or succeed? */
+  if (npass >= NPASSES_MIN) {
+    /* printf("pass %s sampler\n", dist_name(dist));*/
+    return true;
+  } else {
+    printf("fail %s sampler\n", dist_name(dist));
+    return false;
+  }
+}
+
+/* This is the seed of the deterministic randomness */
+static uint32_t deterministic_rand_counter;
+
+/** Initialize the seed of the deterministic randomness. */
+static void
+init_deterministic_rand(void)
+{
+  deterministic_rand_counter = crypto_rand_u32();
+}
+
+/** Produce deterministic randomness for the stochastic tests using the global
+ *  deterministic_rand_counter seed
+ *
+ *  This function produces deterministic data over multiple calls iff it's
+ *  called in the same call order with the same 'n' parameter (which is the
+ *  case for the psi test). If not, outputs will deviate. */
+static void
+crypto_rand_deterministic(char *out, size_t n)
+{
+  /* Use a XOF to squeeze bytes out of that silly counter */
+  crypto_xof_t *xof = crypto_xof_new();
+  tor_assert(xof);
+  crypto_xof_add_bytes(xof, (uint8_t*)&deterministic_rand_counter,
+                       sizeof(deterministic_rand_counter));
+  crypto_xof_squeeze_bytes(xof, (uint8_t*)out, n);
+  crypto_xof_free(xof);
+
+  /* Increase counter for next run */
+  deterministic_rand_counter++;
+}
+
+static void
+test_stochastic_uniform(void *arg)
+{
+  (void) arg;
+
+  const struct uniform uniform01 = {
+    .base = UNIFORM(uniform01),
+    .a = 0,
+    .b = 1,
+  };
+  const struct uniform uniform_pos = {
+    .base = UNIFORM(uniform_pos),
+    .a = 1.23,
+    .b = 4.56,
+  };
+  const struct uniform uniform_neg = {
+    .base = UNIFORM(uniform_neg),
+    .a = -10,
+    .b = -1,
+  };
+  const struct uniform uniform_cross = {
+    .base = UNIFORM(uniform_cross),
+    .a = -1.23,
+    .b = 4.56,
+  };
+  const struct uniform uniform_subnormal = {
+    .base = UNIFORM(uniform_subnormal),
+    .a = 4e-324,
+    .b = 4e-310,
+  };
+  const struct uniform uniform_subnormal_cross = {
+    .base = UNIFORM(uniform_subnormal_cross),
+    .a = -4e-324,
+    .b = 4e-310,
+  };
+  bool ok = true;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok &= test_psi_dist_sample(&uniform01.base);
+  ok &= test_psi_dist_sample(&uniform_pos.base);
+  ok &= test_psi_dist_sample(&uniform_neg.base);
+  ok &= test_psi_dist_sample(&uniform_cross.base);
+  ok &= test_psi_dist_sample(&uniform_subnormal.base);
+  ok &= test_psi_dist_sample(&uniform_subnormal_cross.base);
+
+  tt_assert(ok);
+
+ done:
+    ;
+}
+
+static bool
+test_stochastic_logistic_impl(double mu, double sigma)
+{
+  const struct logistic dist = {
+    .base = LOGISTIC(dist),
+    .mu = mu,
+    .sigma = sigma,
+  };
+
+  /* XXX Consider some fancier logistic test.  */
+  return test_psi_dist_sample(&dist.base);
+}
+
+static bool
+test_stochastic_log_logistic_impl(double alpha, double beta)
+{
+  const struct log_logistic dist = {
+    .base = LOG_LOGISTIC(dist),
+    .alpha = alpha,
+    .beta = beta,
+  };
+
+  /* XXX Consider some fancier log logistic test.  */
+  return test_psi_dist_sample(&dist.base);
+}
+
+static bool
+test_stochastic_weibull_impl(double lambda, double k)
+{
+  const struct weibull dist = {
+    .base = WEIBULL(dist),
+    .lambda = lambda,
+    .k = k,
+  };
+
+/*
+ * XXX Consider applying a Tiku-Singh test:
+ *
+ *    M.L. Tiku and M. Singh, `Testing the two-parameter
+ *    Weibull distribution', Communications in Statistics --
+ *    Theory and Methods A10(9), 1981, 907--918.
+ *https://www.tandfonline.com/doi/pdf/10.1080/03610928108828082?needAccess=true
+ */
+  return test_psi_dist_sample(&dist.base);
+}
+
+static bool
+test_stochastic_genpareto_impl(double mu, double sigma, double xi)
+{
+  const struct genpareto dist = {
+    .base = GENPARETO(dist),
+    .mu = mu,
+    .sigma = sigma,
+    .xi = xi,
+  };
+
+  /* XXX Consider some fancier GPD test.  */
+  return test_psi_dist_sample(&dist.base);
+}
+
+static void
+test_stochastic_genpareto(void *arg)
+{
+  bool ok = 0;
+  bool tests_failed = true;
+  (void) arg;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok = test_stochastic_genpareto_impl(0, 1, -0.25);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(0, 1, -1e-30);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(0, 1, 0);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(0, 1, 1e-30);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(0, 1, 0.25);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(-1, 1, -0.25);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(1, 2, 0.25);
+  tt_assert(ok);
+
+  tests_failed = false;
+
+ done:
+  if (tests_failed) {
+    printf("seed: %"PRIu32, deterministic_rand_counter);
+  }
+  UNMOCK(crypto_rand);
+}
+
+static void
+test_stochastic_geometric(void *arg)
+{
+  bool ok = 0;
+  bool tests_failed = true;
+
+  (void) arg;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok = test_stochastic_geometric_impl(0.1);
+  tt_assert(ok);
+  ok = test_stochastic_geometric_impl(0.5);
+  tt_assert(ok);
+  ok = test_stochastic_geometric_impl(0.9);
+  tt_assert(ok);
+  ok = test_stochastic_geometric_impl(1);
+  tt_assert(ok);
+
+  tests_failed = false;
+
+ done:
+  if (tests_failed) {
+    printf("seed: %"PRIu32, deterministic_rand_counter);
+  }
+  UNMOCK(crypto_rand);
+}
+
+static void
+test_stochastic_logistic(void *arg)
+{
+  bool ok = 0;
+  bool tests_failed = true;
+  (void) arg;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok = test_stochastic_logistic_impl(0, 1);
+  tt_assert(ok);
+  ok = test_stochastic_logistic_impl(0, 1e-16);
+  tt_assert(ok);
+  ok = test_stochastic_logistic_impl(1, 10);
+  tt_assert(ok);
+  ok = test_stochastic_logistic_impl(-10, 100);
+  tt_assert(ok);
+
+  tests_failed = false;
+
+ done:
+  if (tests_failed) {
+    printf("seed: %"PRIu32, deterministic_rand_counter);
+  }
+  UNMOCK(crypto_rand);
+}
+
+static void
+test_stochastic_log_logistic(void *arg)
+{
+  bool ok = 0;
+  bool tests_failed = true;
+  (void) arg;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok = test_stochastic_log_logistic_impl(1, 1);
+  tt_assert(ok);
+  ok = test_stochastic_log_logistic_impl(1, 10);
+  tt_assert(ok);
+  ok = test_stochastic_log_logistic_impl(M_E, 1e-1);
+  tt_assert(ok);
+  ok = test_stochastic_log_logistic_impl(exp(-10), 1e-2);
+  tt_assert(ok);
+
+  tests_failed = false;
+
+ done:
+  if (tests_failed) {
+    printf("seed: %"PRIu32, deterministic_rand_counter);
+  }
+  UNMOCK(crypto_rand);
+}
+
+static void
+test_stochastic_weibull(void *arg)
+{
+  bool ok = 0;
+  bool tests_failed = true;
+  (void) arg;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok = test_stochastic_weibull_impl(1, 0.5);
+  tt_assert(ok);
+  ok = test_stochastic_weibull_impl(1, 1);
+  tt_assert(ok);
+  ok = test_stochastic_weibull_impl(1, 1.5);
+  tt_assert(ok);
+  ok = test_stochastic_weibull_impl(1, 2);
+  tt_assert(ok);
+  ok = test_stochastic_weibull_impl(10, 1);
+  tt_assert(ok);
+
+  tests_failed = false;
+
+ done:
+  if (tests_failed) {
+    printf("seed: %"PRIu32, deterministic_rand_counter);
+  }
+  UNMOCK(crypto_rand);
+}
+
+struct testcase_t prob_distr_tests[] = {
+  { "logit_logistics", test_logit_logistic, TT_FORK, NULL, NULL },
+  { "log_logistic", test_log_logistic, TT_FORK, NULL, NULL },
+  { "weibull", test_weibull, TT_FORK, NULL, NULL },
+  { "genpareto", test_genpareto, TT_FORK, NULL, NULL },
+  { "uniform_interval", test_uniform_interval, TT_FORK, NULL, NULL },
+  END_OF_TESTCASES
+};
+
+struct testcase_t slow_stochastic_prob_distr_tests[] = {
+  { "stochastic_genpareto", test_stochastic_genpareto, TT_FORK, NULL, NULL },
+  { "stochastic_geometric", test_stochastic_geometric, TT_FORK, NULL, NULL },
+  { "stochastic_uniform", test_stochastic_uniform, TT_FORK, NULL, NULL },
+  { "stochastic_logistic", test_stochastic_logistic, TT_FORK, NULL, NULL },
+  { "stochastic_log_logistic", test_stochastic_log_logistic, TT_FORK, NULL,
+    NULL },
+  { "stochastic_weibull", test_stochastic_weibull, TT_FORK, NULL, NULL },
+  END_OF_TESTCASES
+};
diff --git a/src/test/test_slow.c b/src/test/test_slow.c
index 97c2912af6..39a203c726 100644
--- a/src/test/test_slow.c
+++ b/src/test/test_slow.c
@@ -21,6 +21,7 @@
 struct testgroup_t testgroups[] = {
   { "slow/crypto/", slow_crypto_tests },
   { "slow/process/", slow_process_tests },
+  { "slow/prob_distr/", slow_stochastic_prob_distr_tests },
   END_OF_GROUPS
 };
 
diff --git a/src/test/test_util.c b/src/test/test_util.c
index 5ffe2d6b1a..2fd9d24498 100644
--- a/src/test/test_util.c
+++ b/src/test/test_util.c
@@ -19,6 +19,7 @@
 #include "feature/client/transports.h"
 #include "lib/crypt_ops/crypto_format.h"
 #include "lib/crypt_ops/crypto_rand.h"
+#include "lib/defs/time.h"
 #include "test/test.h"
 #include "lib/memarea/memarea.h"
 #include "lib/process/waitpid.h"
@@ -69,6 +70,28 @@
 #define INFINITY_DBL ((double)INFINITY)
 #define NAN_DBL ((double)NAN)
 
+/** Test the tor_isinf() wrapper */
+static void
+test_tor_isinf(void *arg)
+{
+  (void) arg;
+
+  tt_assert(tor_isinf(INFINITY_DBL));
+
+  tt_assert(!tor_isinf(NAN_DBL));
+  tt_assert(!tor_isinf(DBL_EPSILON));
+  tt_assert(!tor_isinf(DBL_MAX));
+  tt_assert(!tor_isinf(DBL_MIN));
+
+  tt_assert(!tor_isinf(0.0));
+  tt_assert(!tor_isinf(0.1));
+  tt_assert(!tor_isinf(3));
+  tt_assert(!tor_isinf(3.14));
+
+ done:
+  ;
+}
+
 /* XXXX this is a minimal wrapper to make the unit tests compile with the
  * changed tor_timegm interface. */
 static time_t
@@ -404,7 +427,6 @@ test_util_time(void *arg)
 
 /* Assume tv_usec is an unsigned integer until proven otherwise */
 #define TV_USEC_MAX UINT_MAX
-#define TOR_USEC_PER_SEC 1000000
 
   /* Overflows in the result type */
 
@@ -6182,6 +6204,7 @@ struct testcase_t util_tests[] = {
   UTIL_TEST(mathlog, 0),
   UTIL_TEST(fraction, 0),
   UTIL_TEST(weak_random, 0),
+  { "tor_isinf", test_tor_isinf, TT_FORK, NULL, NULL },
   { "socket_ipv4", test_util_socket, TT_FORK, &passthrough_setup,
     (void*)"4" },
   { "socket_ipv6", test_util_socket, TT_FORK,
diff --git a/src/trunnel/circpad_negotiation.c b/src/trunnel/circpad_negotiation.c
new file mode 100644
index 0000000000..236be06ada
--- /dev/null
+++ b/src/trunnel/circpad_negotiation.c
@@ -0,0 +1,549 @@
+/* circpad_negotiation.c -- generated by Trunnel v1.5.2.
+ * https://gitweb.torproject.org/trunnel.git
+ * You probably shouldn't edit this file.
+ */
+#include <stdlib.h>
+#include "trunnel-impl.h"
+
+#include "circpad_negotiation.h"
+
+#define TRUNNEL_SET_ERROR_CODE(obj) \
+  do {                              \
+    (obj)->trunnel_error_code_ = 1; \
+  } while (0)
+
+#if defined(__COVERITY__) || defined(__clang_analyzer__)
+/* If we're running a static analysis tool, we don't want it to complain
+ * that some of our remaining-bytes checks are dead-code. */
+int circpadnegotiation_deadcode_dummy__ = 0;
+#define OR_DEADCODE_DUMMY || circpadnegotiation_deadcode_dummy__
+#else
+#define OR_DEADCODE_DUMMY
+#endif
+
+#define CHECK_REMAINING(nbytes, label)                           \
+  do {                                                           \
+    if (remaining < (nbytes) OR_DEADCODE_DUMMY) {                \
+      goto label;                                                \
+    }                                                            \
+  } while (0)
+
+circpad_negotiate_t *
+circpad_negotiate_new(void)
+{
+  circpad_negotiate_t *val = trunnel_calloc(1, sizeof(circpad_negotiate_t));
+  if (NULL == val)
+    return NULL;
+  val->command = CIRCPAD_COMMAND_START;
+  return val;
+}
+
+/** Release all storage held inside 'obj', but do not free 'obj'.
+ */
+static void
+circpad_negotiate_clear(circpad_negotiate_t *obj)
+{
+  (void) obj;
+}
+
+void
+circpad_negotiate_free(circpad_negotiate_t *obj)
+{
+  if (obj == NULL)
+    return;
+  circpad_negotiate_clear(obj);
+  trunnel_memwipe(obj, sizeof(circpad_negotiate_t));
+  trunnel_free_(obj);
+}
+
+uint8_t
+circpad_negotiate_get_version(const circpad_negotiate_t *inp)
+{
+  return inp->version;
+}
+int
+circpad_negotiate_set_version(circpad_negotiate_t *inp, uint8_t val)
+{
+  if (! ((val == 0))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->version = val;
+  return 0;
+}
+uint8_t
+circpad_negotiate_get_command(const circpad_negotiate_t *inp)
+{
+  return inp->command;
+}
+int
+circpad_negotiate_set_command(circpad_negotiate_t *inp, uint8_t val)
+{
+  if (! ((val == CIRCPAD_COMMAND_START || val == CIRCPAD_COMMAND_STOP))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->command = val;
+  return 0;
+}
+uint8_t
+circpad_negotiate_get_machine_type(const circpad_negotiate_t *inp)
+{
+  return inp->machine_type;
+}
+int
+circpad_negotiate_set_machine_type(circpad_negotiate_t *inp, uint8_t val)
+{
+  inp->machine_type = val;
+  return 0;
+}
+uint8_t
+circpad_negotiate_get_echo_request(const circpad_negotiate_t *inp)
+{
+  return inp->echo_request;
+}
+int
+circpad_negotiate_set_echo_request(circpad_negotiate_t *inp, uint8_t val)
+{
+  if (! ((val == 0 || val == 1))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->echo_request = val;
+  return 0;
+}
+const char *
+circpad_negotiate_check(const circpad_negotiate_t *obj)
+{
+  if (obj == NULL)
+    return "Object was NULL";
+  if (obj->trunnel_error_code_)
+    return "A set function failed on this object";
+  if (! (obj->version == 0))
+    return "Integer out of bounds";
+  if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP))
+    return "Integer out of bounds";
+  if (! (obj->echo_request == 0 || obj->echo_request == 1))
+    return "Integer out of bounds";
+  return NULL;
+}
+
+ssize_t
+circpad_negotiate_encoded_len(const circpad_negotiate_t *obj)
+{
+  ssize_t result = 0;
+
+  if (NULL != circpad_negotiate_check(obj))
+     return -1;
+
+
+  /* Length of u8 version IN [0] */
+  result += 1;
+
+  /* Length of u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  result += 1;
+
+  /* Length of u8 machine_type */
+  result += 1;
+
+  /* Length of u8 echo_request IN [0, 1] */
+  result += 1;
+  return result;
+}
+int
+circpad_negotiate_clear_errors(circpad_negotiate_t *obj)
+{
+  int r = obj->trunnel_error_code_;
+  obj->trunnel_error_code_ = 0;
+  return r;
+}
+ssize_t
+circpad_negotiate_encode(uint8_t *output, const size_t avail, const circpad_negotiate_t *obj)
+{
+  ssize_t result = 0;
+  size_t written = 0;
+  uint8_t *ptr = output;
+  const char *msg;
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  const ssize_t encoded_len = circpad_negotiate_encoded_len(obj);
+#endif
+
+  if (NULL != (msg = circpad_negotiate_check(obj)))
+    goto check_failed;
+
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  trunnel_assert(encoded_len >= 0);
+#endif
+
+  /* Encode u8 version IN [0] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->version));
+  written += 1; ptr += 1;
+
+  /* Encode u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->command));
+  written += 1; ptr += 1;
+
+  /* Encode u8 machine_type */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->machine_type));
+  written += 1; ptr += 1;
+
+  /* Encode u8 echo_request IN [0, 1] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->echo_request));
+  written += 1; ptr += 1;
+
+
+  trunnel_assert(ptr == output + written);
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  {
+    trunnel_assert(encoded_len >= 0);
+    trunnel_assert((size_t)encoded_len == written);
+  }
+
+#endif
+
+  return written;
+
+ truncated:
+  result = -2;
+  goto fail;
+ check_failed:
+  (void)msg;
+  result = -1;
+  goto fail;
+ fail:
+  trunnel_assert(result < 0);
+  return result;
+}
+
+/** As circpad_negotiate_parse(), but do not allocate the output
+ * object.
+ */
+static ssize_t
+circpad_negotiate_parse_into(circpad_negotiate_t *obj, const uint8_t *input, const size_t len_in)
+{
+  const uint8_t *ptr = input;
+  size_t remaining = len_in;
+  ssize_t result = 0;
+  (void)result;
+
+  /* Parse u8 version IN [0] */
+  CHECK_REMAINING(1, truncated);
+  obj->version = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->version == 0))
+    goto fail;
+
+  /* Parse u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  CHECK_REMAINING(1, truncated);
+  obj->command = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP))
+    goto fail;
+
+  /* Parse u8 machine_type */
+  CHECK_REMAINING(1, truncated);
+  obj->machine_type = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+
+  /* Parse u8 echo_request IN [0, 1] */
+  CHECK_REMAINING(1, truncated);
+  obj->echo_request = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->echo_request == 0 || obj->echo_request == 1))
+    goto fail;
+  trunnel_assert(ptr + remaining == input + len_in);
+  return len_in - remaining;
+
+ truncated:
+  return -2;
+ fail:
+  result = -1;
+  return result;
+}
+
+ssize_t
+circpad_negotiate_parse(circpad_negotiate_t **output, const uint8_t *input, const size_t len_in)
+{
+  ssize_t result;
+  *output = circpad_negotiate_new();
+  if (NULL == *output)
+    return -1;
+  result = circpad_negotiate_parse_into(*output, input, len_in);
+  if (result < 0) {
+    circpad_negotiate_free(*output);
+    *output = NULL;
+  }
+  return result;
+}
+circpad_negotiated_t *
+circpad_negotiated_new(void)
+{
+  circpad_negotiated_t *val = trunnel_calloc(1, sizeof(circpad_negotiated_t));
+  if (NULL == val)
+    return NULL;
+  val->command = CIRCPAD_COMMAND_START;
+  val->response = CIRCPAD_RESPONSE_ERR;
+  return val;
+}
+
+/** Release all storage held inside 'obj', but do not free 'obj'.
+ */
+static void
+circpad_negotiated_clear(circpad_negotiated_t *obj)
+{
+  (void) obj;
+}
+
+void
+circpad_negotiated_free(circpad_negotiated_t *obj)
+{
+  if (obj == NULL)
+    return;
+  circpad_negotiated_clear(obj);
+  trunnel_memwipe(obj, sizeof(circpad_negotiated_t));
+  trunnel_free_(obj);
+}
+
+uint8_t
+circpad_negotiated_get_version(const circpad_negotiated_t *inp)
+{
+  return inp->version;
+}
+int
+circpad_negotiated_set_version(circpad_negotiated_t *inp, uint8_t val)
+{
+  if (! ((val == 0))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->version = val;
+  return 0;
+}
+uint8_t
+circpad_negotiated_get_command(const circpad_negotiated_t *inp)
+{
+  return inp->command;
+}
+int
+circpad_negotiated_set_command(circpad_negotiated_t *inp, uint8_t val)
+{
+  if (! ((val == CIRCPAD_COMMAND_START || val == CIRCPAD_COMMAND_STOP))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->command = val;
+  return 0;
+}
+uint8_t
+circpad_negotiated_get_response(const circpad_negotiated_t *inp)
+{
+  return inp->response;
+}
+int
+circpad_negotiated_set_response(circpad_negotiated_t *inp, uint8_t val)
+{
+  if (! ((val == CIRCPAD_RESPONSE_ERR || val == CIRCPAD_RESPONSE_OK))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->response = val;
+  return 0;
+}
+uint8_t
+circpad_negotiated_get_machine_type(const circpad_negotiated_t *inp)
+{
+  return inp->machine_type;
+}
+int
+circpad_negotiated_set_machine_type(circpad_negotiated_t *inp, uint8_t val)
+{
+  inp->machine_type = val;
+  return 0;
+}
+const char *
+circpad_negotiated_check(const circpad_negotiated_t *obj)
+{
+  if (obj == NULL)
+    return "Object was NULL";
+  if (obj->trunnel_error_code_)
+    return "A set function failed on this object";
+  if (! (obj->version == 0))
+    return "Integer out of bounds";
+  if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP))
+    return "Integer out of bounds";
+  if (! (obj->response == CIRCPAD_RESPONSE_ERR || obj->response == CIRCPAD_RESPONSE_OK))
+    return "Integer out of bounds";
+  return NULL;
+}
+
+ssize_t
+circpad_negotiated_encoded_len(const circpad_negotiated_t *obj)
+{
+  ssize_t result = 0;
+
+  if (NULL != circpad_negotiated_check(obj))
+     return -1;
+
+
+  /* Length of u8 version IN [0] */
+  result += 1;
+
+  /* Length of u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  result += 1;
+
+  /* Length of u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */
+  result += 1;
+
+  /* Length of u8 machine_type */
+  result += 1;
+  return result;
+}
+int
+circpad_negotiated_clear_errors(circpad_negotiated_t *obj)
+{
+  int r = obj->trunnel_error_code_;
+  obj->trunnel_error_code_ = 0;
+  return r;
+}
+ssize_t
+circpad_negotiated_encode(uint8_t *output, const size_t avail, const circpad_negotiated_t *obj)
+{
+  ssize_t result = 0;
+  size_t written = 0;
+  uint8_t *ptr = output;
+  const char *msg;
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  const ssize_t encoded_len = circpad_negotiated_encoded_len(obj);
+#endif
+
+  if (NULL != (msg = circpad_negotiated_check(obj)))
+    goto check_failed;
+
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  trunnel_assert(encoded_len >= 0);
+#endif
+
+  /* Encode u8 version IN [0] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->version));
+  written += 1; ptr += 1;
+
+  /* Encode u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->command));
+  written += 1; ptr += 1;
+
+  /* Encode u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->response));
+  written += 1; ptr += 1;
+
+  /* Encode u8 machine_type */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->machine_type));
+  written += 1; ptr += 1;
+
+
+  trunnel_assert(ptr == output + written);
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  {
+    trunnel_assert(encoded_len >= 0);
+    trunnel_assert((size_t)encoded_len == written);
+  }
+
+#endif
+
+  return written;
+
+ truncated:
+  result = -2;
+  goto fail;
+ check_failed:
+  (void)msg;
+  result = -1;
+  goto fail;
+ fail:
+  trunnel_assert(result < 0);
+  return result;
+}
+
+/** As circpad_negotiated_parse(), but do not allocate the output
+ * object.
+ */
+static ssize_t
+circpad_negotiated_parse_into(circpad_negotiated_t *obj, const uint8_t *input, const size_t len_in)
+{
+  const uint8_t *ptr = input;
+  size_t remaining = len_in;
+  ssize_t result = 0;
+  (void)result;
+
+  /* Parse u8 version IN [0] */
+  CHECK_REMAINING(1, truncated);
+  obj->version = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->version == 0))
+    goto fail;
+
+  /* Parse u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  CHECK_REMAINING(1, truncated);
+  obj->command = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP))
+    goto fail;
+
+  /* Parse u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */
+  CHECK_REMAINING(1, truncated);
+  obj->response = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->response == CIRCPAD_RESPONSE_ERR || obj->response == CIRCPAD_RESPONSE_OK))
+    goto fail;
+
+  /* Parse u8 machine_type */
+  CHECK_REMAINING(1, truncated);
+  obj->machine_type = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  trunnel_assert(ptr + remaining == input + len_in);
+  return len_in - remaining;
+
+ truncated:
+  return -2;
+ fail:
+  result = -1;
+  return result;
+}
+
+ssize_t
+circpad_negotiated_parse(circpad_negotiated_t **output, const uint8_t *input, const size_t len_in)
+{
+  ssize_t result;
+  *output = circpad_negotiated_new();
+  if (NULL == *output)
+    return -1;
+  result = circpad_negotiated_parse_into(*output, input, len_in);
+  if (result < 0) {
+    circpad_negotiated_free(*output);
+    *output = NULL;
+  }
+  return result;
+}
diff --git a/src/trunnel/circpad_negotiation.h b/src/trunnel/circpad_negotiation.h
new file mode 100644
index 0000000000..d09080dc16
--- /dev/null
+++ b/src/trunnel/circpad_negotiation.h
@@ -0,0 +1,195 @@
+/* circpad_negotiation.h -- generated by Trunnel v1.5.2.
+ * https://gitweb.torproject.org/trunnel.git
+ * You probably shouldn't edit this file.
+ */
+#ifndef TRUNNEL_CIRCPAD_NEGOTIATION_H
+#define TRUNNEL_CIRCPAD_NEGOTIATION_H
+
+#include <stdint.h>
+#include "trunnel.h"
+
+#define CIRCPAD_COMMAND_STOP 1
+#define CIRCPAD_COMMAND_START 2
+#define CIRCPAD_RESPONSE_OK 1
+#define CIRCPAD_RESPONSE_ERR 2
+#define CIRCPAD_MACHINE_CIRC_SETUP 1
+/**
+ * This command tells the relay to alter its min and max netflow
+ * timeout range values, and send padding at that rate (resuming
+ * if stopped). */
+#if !defined(TRUNNEL_OPAQUE) && !defined(TRUNNEL_OPAQUE_CIRCPAD_NEGOTIATE)
+struct circpad_negotiate_st {
+  uint8_t version;
+  uint8_t command;
+  /** Machine type is left unbounded because we can specify
+     * new machines in the consensus */
+  uint8_t machine_type;
+  /** If true, send a relay_drop reply.. */
+  uint8_t echo_request;
+  uint8_t trunnel_error_code_;
+};
+#endif
+typedef struct circpad_negotiate_st circpad_negotiate_t;
+/**
+ * This command tells the relay to alter its min and max netflow
+ * timeout range values, and send padding at that rate (resuming
+ * if stopped). */
+#if !defined(TRUNNEL_OPAQUE) && !defined(TRUNNEL_OPAQUE_CIRCPAD_NEGOTIATED)
+struct circpad_negotiated_st {
+  uint8_t version;
+  uint8_t command;
+  uint8_t response;
+  /** Machine type is left unbounded because we can specify
+     * new machines in the consensus */
+  uint8_t machine_type;
+  uint8_t trunnel_error_code_;
+};
+#endif
+typedef struct circpad_negotiated_st circpad_negotiated_t;
+/** Return a newly allocated circpad_negotiate with all elements set
+ * to zero.
+ */
+circpad_negotiate_t *circpad_negotiate_new(void);
+/** Release all storage held by the circpad_negotiate in 'victim'. (Do
+ * nothing if 'victim' is NULL.)
+ */
+void circpad_negotiate_free(circpad_negotiate_t *victim);
+/** Try to parse a circpad_negotiate from the buffer in 'input', using
+ * up to 'len_in' bytes from the input buffer. On success, return the
+ * number of bytes consumed and set *output to the newly allocated
+ * circpad_negotiate_t. On failure, return -2 if the input appears
+ * truncated, and -1 if the input is otherwise invalid.
+ */
+ssize_t circpad_negotiate_parse(circpad_negotiate_t **output, const uint8_t *input, const size_t len_in);
+/** Return the number of bytes we expect to need to encode the
+ * circpad_negotiate in 'obj'. On failure, return a negative value.
+ * Note that this value may be an overestimate, and can even be an
+ * underestimate for certain unencodeable objects.
+ */
+ssize_t circpad_negotiate_encoded_len(const circpad_negotiate_t *obj);
+/** Try to encode the circpad_negotiate from 'input' into the buffer
+ * at 'output', using up to 'avail' bytes of the output buffer. On
+ * success, return the number of bytes used. On failure, return -2 if
+ * the buffer was not long enough, and -1 if the input was invalid.
+ */
+ssize_t circpad_negotiate_encode(uint8_t *output, size_t avail, const circpad_negotiate_t *input);
+/** Check whether the internal state of the circpad_negotiate in 'obj'
+ * is consistent. Return NULL if it is, and a short message if it is
+ * not.
+ */
+const char *circpad_negotiate_check(const circpad_negotiate_t *obj);
+/** Clear any errors that were set on the object 'obj' by its setter
+ * functions. Return true iff errors were cleared.
+ */
+int circpad_negotiate_clear_errors(circpad_negotiate_t *obj);
+/** Return the value of the version field of the circpad_negotiate_t
+ * in 'inp'
+ */
+uint8_t circpad_negotiate_get_version(const circpad_negotiate_t *inp);
+/** Set the value of the version field of the circpad_negotiate_t in
+ * 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiate_set_version(circpad_negotiate_t *inp, uint8_t val);
+/** Return the value of the command field of the circpad_negotiate_t
+ * in 'inp'
+ */
+uint8_t circpad_negotiate_get_command(const circpad_negotiate_t *inp);
+/** Set the value of the command field of the circpad_negotiate_t in
+ * 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiate_set_command(circpad_negotiate_t *inp, uint8_t val);
+/** Return the value of the machine_type field of the
+ * circpad_negotiate_t in 'inp'
+ */
+uint8_t circpad_negotiate_get_machine_type(const circpad_negotiate_t *inp);
+/** Set the value of the machine_type field of the circpad_negotiate_t
+ * in 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiate_set_machine_type(circpad_negotiate_t *inp, uint8_t val);
+/** Return the value of the echo_request field of the
+ * circpad_negotiate_t in 'inp'
+ */
+uint8_t circpad_negotiate_get_echo_request(const circpad_negotiate_t *inp);
+/** Set the value of the echo_request field of the circpad_negotiate_t
+ * in 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiate_set_echo_request(circpad_negotiate_t *inp, uint8_t val);
+/** Return a newly allocated circpad_negotiated with all elements set
+ * to zero.
+ */
+circpad_negotiated_t *circpad_negotiated_new(void);
+/** Release all storage held by the circpad_negotiated in 'victim'.
+ * (Do nothing if 'victim' is NULL.)
+ */
+void circpad_negotiated_free(circpad_negotiated_t *victim);
+/** Try to parse a circpad_negotiated from the buffer in 'input',
+ * using up to 'len_in' bytes from the input buffer. On success,
+ * return the number of bytes consumed and set *output to the newly
+ * allocated circpad_negotiated_t. On failure, return -2 if the input
+ * appears truncated, and -1 if the input is otherwise invalid.
+ */
+ssize_t circpad_negotiated_parse(circpad_negotiated_t **output, const uint8_t *input, const size_t len_in);
+/** Return the number of bytes we expect to need to encode the
+ * circpad_negotiated in 'obj'. On failure, return a negative value.
+ * Note that this value may be an overestimate, and can even be an
+ * underestimate for certain unencodeable objects.
+ */
+ssize_t circpad_negotiated_encoded_len(const circpad_negotiated_t *obj);
+/** Try to encode the circpad_negotiated from 'input' into the buffer
+ * at 'output', using up to 'avail' bytes of the output buffer. On
+ * success, return the number of bytes used. On failure, return -2 if
+ * the buffer was not long enough, and -1 if the input was invalid.
+ */
+ssize_t circpad_negotiated_encode(uint8_t *output, size_t avail, const circpad_negotiated_t *input);
+/** Check whether the internal state of the circpad_negotiated in
+ * 'obj' is consistent. Return NULL if it is, and a short message if
+ * it is not.
+ */
+const char *circpad_negotiated_check(const circpad_negotiated_t *obj);
+/** Clear any errors that were set on the object 'obj' by its setter
+ * functions. Return true iff errors were cleared.
+ */
+int circpad_negotiated_clear_errors(circpad_negotiated_t *obj);
+/** Return the value of the version field of the circpad_negotiated_t
+ * in 'inp'
+ */
+uint8_t circpad_negotiated_get_version(const circpad_negotiated_t *inp);
+/** Set the value of the version field of the circpad_negotiated_t in
+ * 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiated_set_version(circpad_negotiated_t *inp, uint8_t val);
+/** Return the value of the command field of the circpad_negotiated_t
+ * in 'inp'
+ */
+uint8_t circpad_negotiated_get_command(const circpad_negotiated_t *inp);
+/** Set the value of the command field of the circpad_negotiated_t in
+ * 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiated_set_command(circpad_negotiated_t *inp, uint8_t val);
+/** Return the value of the response field of the circpad_negotiated_t
+ * in 'inp'
+ */
+uint8_t circpad_negotiated_get_response(const circpad_negotiated_t *inp);
+/** Set the value of the response field of the circpad_negotiated_t in
+ * 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiated_set_response(circpad_negotiated_t *inp, uint8_t val);
+/** Return the value of the machine_type field of the
+ * circpad_negotiated_t in 'inp'
+ */
+uint8_t circpad_negotiated_get_machine_type(const circpad_negotiated_t *inp);
+/** Set the value of the machine_type field of the
+ * circpad_negotiated_t in 'inp' to 'val'. Return 0 on success; return
+ * -1 and set the error code on 'inp' on failure.
+ */
+int circpad_negotiated_set_machine_type(circpad_negotiated_t *inp, uint8_t val);
+
+
+#endif
diff --git a/src/trunnel/circpad_negotiation.trunnel b/src/trunnel/circpad_negotiation.trunnel
new file mode 100644
index 0000000000..abbc929cc5
--- /dev/null
+++ b/src/trunnel/circpad_negotiation.trunnel
@@ -0,0 +1,44 @@
+/* These are the padding negotiation commands */
+const CIRCPAD_COMMAND_STOP = 1;
+const CIRCPAD_COMMAND_START = 2;
+
+/* Responses to commands */
+const CIRCPAD_RESPONSE_OK = 1;
+const CIRCPAD_RESPONSE_ERR = 2;
+
+/* Built-in machine types */
+
+/* 1) Machine that obscures circuit setup */
+const CIRCPAD_MACHINE_CIRC_SETUP = 1;
+
+/**
+ * This command tells the relay to alter its min and max netflow
+ * timeout range values, and send padding at that rate (resuming
+ * if stopped). */
+struct circpad_negotiate {
+  u8 version IN [0];
+  u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP];
+
+  /** Machine type is left unbounded because we can specify
+   * new machines in the consensus */
+  u8 machine_type;
+
+  /** If true, send a relay_drop reply.. */
+  // FIXME-MP-AP: Maybe we just say to transition to the first state
+  // here instead.. Also what about delay before responding?
+  u8 echo_request IN [0,1];
+};
+
+/**
+ * This command tells the relay to alter its min and max netflow
+ * timeout range values, and send padding at that rate (resuming
+ * if stopped). */
+struct circpad_negotiated {
+  u8 version IN [0];
+  u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP];
+  u8 response IN [CIRCPAD_RESPONSE_OK, CIRCPAD_RESPONSE_ERR];
+
+  /** Machine type is left unbounded because we can specify
+   * new machines in the consensus */
+  u8 machine_type;
+};
diff --git a/src/trunnel/include.am b/src/trunnel/include.am
index b5db0609a8..4f4f1d3624 100644
--- a/src/trunnel/include.am
+++ b/src/trunnel/include.am
@@ -11,7 +11,8 @@ TRUNNELINPUTS = \
 	src/trunnel/link_handshake.trunnel \
 	src/trunnel/pwbox.trunnel \
 	src/trunnel/channelpadding_negotiation.trunnel \
-	src/trunner/socks5.trunnel
+	src/trunnel/socks5.trunnel \
+	src/trunnel/circpad_negotiation.trunnel
 
 TRUNNELSOURCES = \
 	src/ext/trunnel/trunnel.c \
@@ -23,8 +24,9 @@ TRUNNELSOURCES = \
 	src/trunnel/hs/cell_introduce1.c \
 	src/trunnel/hs/cell_rendezvous.c \
 	src/trunnel/channelpadding_negotiation.c \
-	src/trunnel/socks5.c                    \
-	src/trunnel/netinfo.c
+	src/trunnel/socks5.c \
+	src/trunnel/netinfo.c \
+	src/trunnel/circpad_negotiation.c
 
 TRUNNELHEADERS = \
 	src/ext/trunnel/trunnel.h		\
@@ -39,7 +41,8 @@ TRUNNELHEADERS = \
 	src/trunnel/hs/cell_rendezvous.h \
 	src/trunnel/channelpadding_negotiation.h \
 	src/trunnel/socks5.h                    \
-	src/trunnel/netinfo.h
+	src/trunnel/netinfo.h \
+	src/trunnel/circpad_negotiation.h
 
 src_trunnel_libor_trunnel_a_SOURCES = $(TRUNNELSOURCES)
 src_trunnel_libor_trunnel_a_CPPFLAGS = \
author	Nick Mathewson <nickm@torproject.org>	2019-01-14 14:48:00 -0500
committer	Nick Mathewson <nickm@torproject.org>	2019-01-14 14:48:00 -0500
commit	b169c8c14f23394b40305f38ee4ce08add278e27 (patch)
tree	0649da16a97792103773f9d5cedbfd75deac49bd
parent	691dec5d4615dec9a845d0f7dea7ef55cc66fe62 (diff)
parent	b269ab5aaeee65a3a0b1e5e0923d9dc7898c232e (diff)
download	tor-b169c8c14f23394b40305f38ee4ce08add278e27.tar.gz tor-b169c8c14f23394b40305f38ee4ce08add278e27.zip