diff options
author | Nick Mathewson <nickm@torproject.org> | 2019-01-14 14:48:00 -0500 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2019-01-14 14:48:00 -0500 |
commit | b169c8c14f23394b40305f38ee4ce08add278e27 (patch) | |
tree | 0649da16a97792103773f9d5cedbfd75deac49bd | |
parent | 691dec5d4615dec9a845d0f7dea7ef55cc66fe62 (diff) | |
parent | b269ab5aaeee65a3a0b1e5e0923d9dc7898c232e (diff) | |
download | tor-b169c8c14f23394b40305f38ee4ce08add278e27.tar.gz tor-b169c8c14f23394b40305f38ee4ce08add278e27.zip |
Merge remote-tracking branch 'asn-github/adaptive_padding-final'
53 files changed, 10102 insertions, 40 deletions
diff --git a/doc/tor.1.txt b/doc/tor.1.txt index b058bebcb3..92355dfb54 100644 --- a/doc/tor.1.txt +++ b/doc/tor.1.txt @@ -1021,6 +1021,26 @@ The following options are useful only for clients (that is, if The .exit address notation, if enabled via MapAddress, overrides this option. +[[MiddleNodes]] **MiddleNodes** __node__,__node__,__...__:: + A list of identity fingerprints and country codes of nodes + to use for "middle" hops in your normal circuits. + Normal circuits include all circuits except for direct connections + to directory servers. Middle hops are all hops other than exit and entry. + ++ + This is an **experimental** feature that is meant to be used by researchers + and developers to test new features in the Tor network safely. Using it + without care will strongly influence your anonymity. This feature might get + removed in the future. ++ + The HSLayer2Node and HSLayer3Node options override this option for onion + service circuits, if they are set. The vanguards addon will read this + option, and if set, it will set HSLayer2Nodes and HSLayer3Nodes to nodes + from this set. ++ + The ExcludeNodes option overrides this option: any node listed in both + MiddleNodes and ExcludeNodes is treated as excluded. See + the **ExcludeNodes** option for more information on how to specify nodes. + [[EntryNodes]] **EntryNodes** __node__,__node__,__...__:: A list of identity fingerprints and country codes of nodes to use for the first hop in your normal circuits. @@ -1037,13 +1057,14 @@ The following options are useful only for clients (that is, if If StrictNodes is set to 1, Tor will treat solely the ExcludeNodes option as a requirement to follow for all the circuits you generate, even if doing so will break functionality for you (StrictNodes applies to neither - ExcludeExitNodes nor to ExitNodes). If StrictNodes is set to 0, Tor will - still try to avoid nodes in the ExcludeNodes list, but it will err on the - side of avoiding unexpected errors. Specifically, StrictNodes 0 tells Tor - that it is okay to use an excluded node when it is *necessary* to perform - relay reachability self-tests, connect to a hidden service, provide a - hidden service to a client, fulfill a .exit request, upload directory - information, or download directory information. (Default: 0) + ExcludeExitNodes nor to ExitNodes, nor to MiddleNodes). If StrictNodes + is set to 0, Tor will still try to avoid nodes in the ExcludeNodes list, + but it will err on the side of avoiding unexpected errors. + Specifically, StrictNodes 0 tells Tor that it is okay to use an excluded + node when it is *necessary* to perform relay reachability self-tests, + connect to a hidden service, provide a hidden service to a client, + fulfill a .exit request, upload directory information, or download + directory information. (Default: 0) [[FascistFirewall]] **FascistFirewall** **0**|**1**:: If 1, Tor will only create outgoing connections to ORs running on ports diff --git a/src/app/config/config.c b/src/app/config/config.c index 22070c346b..ecf4c21545 100644 --- a/src/app/config/config.c +++ b/src/app/config/config.c @@ -421,6 +421,10 @@ static config_var_t option_vars_[] = { V(ExcludeExitNodes, ROUTERSET, NULL), OBSOLETE("ExcludeSingleHopRelays"), V(ExitNodes, ROUTERSET, NULL), + /* Researchers need a way to tell their clients to use specific + * middles that they also control, to allow safe live-network + * experimentation with new padding machines. */ + V(MiddleNodes, ROUTERSET, NULL), V(ExitPolicy, LINELIST, NULL), V(ExitPolicyRejectPrivate, BOOL, "1"), V(ExitPolicyRejectLocalInterfaces, BOOL, "0"), @@ -1693,6 +1697,7 @@ options_need_geoip_info(const or_options_t *options, const char **reason_out) int routerset_usage = routerset_needs_geoip(options->EntryNodes) || routerset_needs_geoip(options->ExitNodes) || + routerset_needs_geoip(options->MiddleNodes) || routerset_needs_geoip(options->ExcludeExitNodes) || routerset_needs_geoip(options->ExcludeNodes) || routerset_needs_geoip(options->HSLayer2Nodes) || @@ -2132,6 +2137,7 @@ options_act(const or_options_t *old_options) options->HSLayer2Nodes) || !routerset_equal(old_options->HSLayer3Nodes, options->HSLayer3Nodes) || + !routerset_equal(old_options->MiddleNodes, options->MiddleNodes) || options->StrictNodes != old_options->StrictNodes) { log_info(LD_CIRC, "Changed to using entry guards or bridges, or changed " diff --git a/src/app/config/or_options_st.h b/src/app/config/or_options_st.h index c2bc1079a5..63a17c9771 100644 --- a/src/app/config/or_options_st.h +++ b/src/app/config/or_options_st.h @@ -72,6 +72,9 @@ struct or_options_t { routerset_t *ExitNodes; /**< Structure containing nicknames, digests, * country codes and IP address patterns of ORs to * consider as exits. */ + routerset_t *MiddleNodes; /**< Structure containing nicknames, digests, + * country codes and IP address patterns of ORs to + * consider as middles. */ routerset_t *EntryNodes;/**< Structure containing nicknames, digests, * country codes and IP address patterns of ORs to * consider as entry points. */ diff --git a/src/app/main/main.c b/src/app/main/main.c index d71e43ec30..ba2dfebd77 100644 --- a/src/app/main/main.c +++ b/src/app/main/main.c @@ -22,6 +22,7 @@ #include "core/mainloop/netstatus.h" #include "core/or/channel.h" #include "core/or/channelpadding.h" +#include "core/or/circuitpadding.h" #include "core/or/channeltls.h" #include "core/or/circuitlist.h" #include "core/or/circuitmux_ewma.h" @@ -645,9 +646,13 @@ tor_init(int argc, char *argv[]) /* The options are now initialised */ const or_options_t *options = get_options(); - /* Initialize channelpadding parameters to defaults until we get - * a consensus */ + /* Initialize channelpadding and circpad parameters to defaults + * until we get a consensus */ channelpadding_new_consensus_params(NULL); + circpad_new_consensus_params(NULL); + + /* Initialize circuit padding to defaults+torrc until we get a consensus */ + circpad_machines_init(); /* Initialize predicted ports list after loading options */ predicted_ports_init(); @@ -766,6 +771,7 @@ tor_free_all(int postfork) dns_free_all(); clear_pending_onions(); circuit_free_all(); + circpad_machines_free(); entry_guards_free_all(); pt_free_all(); channel_tls_free_all(); diff --git a/src/core/include.am b/src/core/include.am index 5e69cb9ada..ae47c75e09 100644 --- a/src/core/include.am +++ b/src/core/include.am @@ -32,6 +32,7 @@ LIBTOR_APP_A_SOURCES = \ src/core/or/circuitlist.c \ src/core/or/circuitmux.c \ src/core/or/circuitmux_ewma.c \ + src/core/or/circuitpadding.c \ src/core/or/circuitstats.c \ src/core/or/circuituse.c \ src/core/or/command.c \ @@ -227,6 +228,7 @@ noinst_HEADERS += \ src/core/or/circuitmux.h \ src/core/or/circuitmux_ewma.h \ src/core/or/circuitstats.h \ + src/core/or/circuitpadding.h \ src/core/or/circuituse.h \ src/core/or/command.h \ src/core/or/connection_edge.h \ diff --git a/src/core/or/circuit_st.h b/src/core/or/circuit_st.h index 2e33b37b01..29bcaa098f 100644 --- a/src/core/or/circuit_st.h +++ b/src/core/or/circuit_st.h @@ -12,6 +12,11 @@ #include "core/or/cell_queue_st.h" struct hs_token_t; +struct circpad_machine_spec_t; +struct circpad_machine_state_t; + +/** Number of padding state machines on a circuit. */ +#define CIRCPAD_MAX_MACHINES (2) /** "magic" value for an origin_circuit_t */ #define ORIGIN_CIRCUIT_MAGIC 0x35315243u @@ -177,6 +182,27 @@ struct circuit_t { /** Hashtable node: used to look up the circuit by its HS token using the HS circuitmap. */ HT_ENTRY(circuit_t) hs_circuitmap_node; + + /** Adaptive Padding state machines: these are immutable. The state machines + * that come from the consensus are saved to a global structure, to avoid + * per-circuit allocations. This merely points to the global copy in + * origin_padding_machines or relay_padding_machines that should never + * change or get deallocated. + * + * Each element of this array corresponds to a different padding machine, + * and we can have up to CIRCPAD_MAX_MACHINES such machines. */ + const struct circpad_machine_spec_t *padding_machine[CIRCPAD_MAX_MACHINES]; + + /** Adaptive Padding machine info for above machines. This is the + * per-circuit mutable information, such as the current state and + * histogram token counts. Some of it is optional (aka NULL). + * If a machine is being shut down, these indexes can be NULL + * without the corresponding padding_machine being NULL, while we + * wait for the other end to respond to our shutdown request. + * + * Each element of this array corresponds to a different padding machine, + * and we can have up to CIRCPAD_MAX_MACHINES such machines. */ + struct circpad_machine_state_t *padding_info[CIRCPAD_MAX_MACHINES]; }; #endif diff --git a/src/core/or/circuitbuild.c b/src/core/or/circuitbuild.c index b89ec09a99..22e4cf96d8 100644 --- a/src/core/or/circuitbuild.c +++ b/src/core/or/circuitbuild.c @@ -43,6 +43,7 @@ #include "core/or/circuitlist.h" #include "core/or/circuitstats.h" #include "core/or/circuituse.h" +#include "core/or/circuitpadding.h" #include "core/or/command.h" #include "core/or/connection_edge.h" #include "core/or/connection_or.h" @@ -950,12 +951,15 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) crypt_path_t *hop = onion_next_hop_in_cpath(circ->cpath); circuit_build_times_handle_completed_hop(circ); + circpad_machine_event_circ_added_hop(circ); + if (hop) { /* Case two: we're on a hop after the first. */ return circuit_send_intermediate_onion_skin(circ, hop); } /* Case three: the circuit is finished. Do housekeeping tasks on it. */ + circpad_machine_event_circ_built(circ); return circuit_build_no_more_hops(circ); } @@ -2606,7 +2610,24 @@ choose_good_middle_server(uint8_t purpose, return choice; } - choice = router_choose_random_node(excluded, options->ExcludeNodes, flags); + if (options->MiddleNodes) { + smartlist_t *sl = smartlist_new(); + routerset_get_all_nodes(sl, options->MiddleNodes, + options->ExcludeNodes, 1); + + smartlist_subtract(sl, excluded); + + choice = node_sl_choose_by_bandwidth(sl, WEIGHT_FOR_MID); + smartlist_free(sl); + if (choice) { + log_fn(LOG_INFO, LD_CIRC, "Chose fixed middle node: %s", + hex_str(choice->identity, DIGEST_LEN)); + } else { + log_fn(LOG_NOTICE, LD_CIRC, "Restricted middle not available"); + } + } else { + choice = router_choose_random_node(excluded, options->ExcludeNodes, flags); + } smartlist_free(excluded); return choice; } diff --git a/src/core/or/circuitlist.c b/src/core/or/circuitlist.c index c4b5f7ee3e..71f8becddc 100644 --- a/src/core/or/circuitlist.c +++ b/src/core/or/circuitlist.c @@ -62,6 +62,7 @@ #include "core/or/circuitlist.h" #include "core/or/circuituse.h" #include "core/or/circuitstats.h" +#include "core/or/circuitpadding.h" #include "core/mainloop/connection.h" #include "app/config/config.h" #include "core/or/connection_edge.h" @@ -1231,6 +1232,9 @@ circuit_free_(circuit_t *circ) CIRCUIT_IS_ORIGIN(circ) ? TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0); + /* Free any circuit padding structures */ + circpad_circuit_free_all_machineinfos(circ); + if (should_free) { memwipe(mem, 0xAA, memlen); /* poison memory */ tor_free(mem); diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c new file mode 100644 index 0000000000..0dadc52139 --- /dev/null +++ b/src/core/or/circuitpadding.c @@ -0,0 +1,2562 @@ +/* Copyright (c) 2017 The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file circuitpadding.c + * \brief Circuit-level padding implementation + * + * \details + * + * This file implements Tor proposal 254 "Padding Negotiation" which is heavily + * inspired by the paper "Toward an Efficient Website Fingerprinting Defense" + * by M. Juarez, M. Imani, M. Perry, C. Diaz, M. Wright. + * + * In particular the code in this file describes mechanisms for clients to + * negotiate various types of circuit-level padding from relays. + * + * Each padding type is described by a state machine (circpad_machine_spec_t), + * which is also referred as a "padding machine" in this file. Currently, + * these state machines are hardcoded in the source code (e.g. see + * circpad_circ_client_machine_init()), but in the future we will be able to + * serialize them in the torrc or the consensus. + * + * As specified by prop#254, clients can negotiate padding with relays by using + * PADDING_NEGOTIATE cells. After successful padding negotiation, padding + * machines are assigned to the circuit in their mutable form as a + * circpad_machine_state_t. + * + * Each state of a padding state machine can be either: + * - A histogram that specifies inter-arrival padding delays. + * - Or a parametrized probability distribution that specifies inter-arrival + * delays (see circpad_distribution_type_t). + * + * Padding machines start from the START state and finish with the END + * state. They can transition between states using the events in + * circpad_event_t. + * + * When a padding machine reaches the END state, it gets wiped from the circuit + * so that other padding machines can take over if needed (see + * circpad_machine_spec_transitioned_to_end()). + **/ + +#define CIRCUITPADDING_PRIVATE + +#include <math.h> +#include "lib/math/fp.h" +#include "lib/math/prob_distr.h" +#include "core/or/or.h" +#include "core/or/circuitpadding.h" +#include "core/or/circuitlist.h" +#include "core/or/circuituse.h" +#include "core/or/relay.h" +#include "feature/stats/rephist.h" +#include "feature/nodelist/networkstatus.h" + +#include "core/or/channel.h" + +#include "lib/time/compat_time.h" +#include "lib/defs/time.h" +#include "lib/crypt_ops/crypto_rand.h" + +#include "core/or/crypt_path_st.h" +#include "core/or/circuit_st.h" +#include "core/or/origin_circuit_st.h" +#include "feature/nodelist/routerstatus_st.h" +#include "feature/nodelist/node_st.h" +#include "core/or/cell_st.h" +#include "core/or/extend_info_st.h" +#include "core/crypto/relay_crypto.h" +#include "feature/nodelist/nodelist.h" + +#include "app/config/config.h" + +static inline circpad_purpose_mask_t circpad_circ_purpose_to_mask(uint8_t + circ_purpose); +static inline circpad_circuit_state_t circpad_circuit_state( + origin_circuit_t *circ); +static void circpad_setup_machine_on_circ(circuit_t *on_circ, + const circpad_machine_spec_t *machine); +static double circpad_distribution_sample(circpad_distribution_t dist); + +/** Cached consensus params */ +static uint8_t circpad_global_max_padding_percent; +static uint16_t circpad_global_allowed_cells; + +/** Global cell counts, for rate limiting */ +static uint64_t circpad_global_padding_sent; +static uint64_t circpad_global_nonpadding_sent; + +/** This is the list of circpad_machine_spec_t's parsed from consensus and + * torrc that have origin_side == 1 (ie: are for client side). + * + * The machines in this smartlist are considered immutable and they are used + * as-is by circuits so they should not change or get deallocated in Tor's + * runtime and as long as circuits are alive. */ +STATIC smartlist_t *origin_padding_machines = NULL; + +/** This is the list of circpad_machine_spec_t's parsed from consensus and + * torrc that have origin_side == 0 (ie: are for relay side). + * + * The machines in this smartlist are considered immutable and they are used + * as-is by circuits so they should not change or get deallocated in Tor's + * runtime and as long as circuits are alive. */ +STATIC smartlist_t *relay_padding_machines = NULL; + +/** Loop over the current padding state machines using <b>loop_var</b> as the + * loop variable. */ +#define FOR_EACH_CIRCUIT_MACHINE_BEGIN(loop_var) \ + STMT_BEGIN \ + for (int loop_var = 0; loop_var < CIRCPAD_MAX_MACHINES; loop_var++) { +#define FOR_EACH_CIRCUIT_MACHINE_END } STMT_END ; + +/** Loop over the current active padding state machines using <b>loop_var</b> + * as the loop variable. If a machine is not active, skip it. */ +#define FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(loop_var, circ) \ + FOR_EACH_CIRCUIT_MACHINE_BEGIN(loop_var) \ + if (!(circ)->padding_info[loop_var]) \ + continue; +#define FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END } STMT_END ; + +/** + * Return a human-readable description for a circuit padding state. + */ +static const char * +circpad_state_to_string(circpad_statenum_t state) +{ + const char *descr; + + switch (state) { + case CIRCPAD_STATE_START: + descr = "START"; + break; + case CIRCPAD_STATE_BURST: + descr = "BURST"; + break; + case CIRCPAD_STATE_GAP: + descr = "GAP"; + break; + case CIRCPAD_STATE_END: + descr = "END"; + break; + default: + descr = "CUSTOM"; // XXX: Just return # in static char buf? + } + + return descr; +} + +/** + * Free the machineinfo at an index + */ +static void +circpad_circuit_machineinfo_free_idx(circuit_t *circ, int idx) +{ + if (circ->padding_info[idx]) { + tor_free(circ->padding_info[idx]->histogram); + timer_free(circ->padding_info[idx]->padding_timer); + tor_free(circ->padding_info[idx]); + } +} + +/** Free all the machineinfos in <b>circ</b> that match <b>machine_num</b>. */ +static void +free_circ_machineinfos_with_machine_num(circuit_t *circ, int machine_num) +{ + FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) { + if (circ->padding_machine[i] && + circ->padding_machine[i]->machine_num == machine_num) { + circpad_circuit_machineinfo_free_idx(circ, i); + circ->padding_machine[i] = NULL; + } + } FOR_EACH_CIRCUIT_MACHINE_END; +} + +/** + * Free all padding machines and mutable info associated with circuit + */ +void +circpad_circuit_free_all_machineinfos(circuit_t *circ) +{ + FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) { + circpad_circuit_machineinfo_free_idx(circ, i); + } FOR_EACH_CIRCUIT_MACHINE_END; +} + +/** + * Allocate a new mutable machineinfo structure. + */ +STATIC circpad_machine_state_t * +circpad_circuit_machineinfo_new(circuit_t *on_circ, int machine_index) +{ + circpad_machine_state_t *mi = + tor_malloc_zero(sizeof(circpad_machine_state_t)); + mi->machine_index = machine_index; + mi->on_circ = on_circ; + + return mi; +} + +/** + * Return the circpad_state_t for the current state based on the + * mutable info. + * + * This function returns NULL when the machine is in the end state or in an + * invalid state. + */ +STATIC const circpad_state_t * +circpad_machine_current_state(const circpad_machine_state_t *mi) +{ + const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi); + + if (mi->current_state == CIRCPAD_STATE_END) { + return NULL; + } else if (BUG(mi->current_state >= machine->num_states)) { + log_fn(LOG_WARN,LD_CIRC, + "Invalid circuit padding state %d", + mi->current_state); + + return NULL; + } + + return &machine->states[mi->current_state]; +} + +/** + * Calculate the lower bound of a histogram bin. The upper bound + * is obtained by calling this function with bin+1, and subtracting 1. + * + * The 0th bin has a special value -- it only represents start_usec. + * This is so we can specify a probability on 0-delay values. + * + * After bin 0, bins are exponentially spaced, so that each subsequent + * bin is twice as large as the previous. This is done so that higher + * time resolution is given to lower time values. + * + * The infinity bin is a the last bin in the array (histogram_len-1). + * It has a usec value of CIRCPAD_DELAY_INFINITE (UINT32_MAX). + */ +STATIC circpad_delay_t +circpad_histogram_bin_to_usec(const circpad_machine_state_t *mi, + circpad_hist_index_t bin) +{ + const circpad_state_t *state = circpad_machine_current_state(mi); + circpad_delay_t start_usec; + + /* Our state should have been checked to be non-null by the caller + * (circpad_machine_remove_token()) */ + if (BUG(state == NULL)) { + return CIRCPAD_DELAY_INFINITE; + } + + if (state->use_rtt_estimate) + start_usec = mi->rtt_estimate_usec+state->start_usec; + else + start_usec = state->start_usec; + + if (bin >= CIRCPAD_INFINITY_BIN(state)) + return CIRCPAD_DELAY_INFINITE; + + if (bin == 0) + return start_usec; + + if (bin == 1) + return start_usec+1; + + /* The bin widths double every index, so that we can have more resolution + * for lower time values in the histogram. */ + const circpad_time_t bin_width_exponent = + 1 << (CIRCPAD_INFINITY_BIN(state) - bin); + return (circpad_delay_t)MIN(start_usec + + state->range_usec/bin_width_exponent, + CIRCPAD_DELAY_INFINITE); +} + +/** Return the midpoint of the histogram bin <b>bin_index</b>. */ +static circpad_delay_t +circpad_get_histogram_bin_midpoint(const circpad_machine_state_t *mi, + int bin_index) +{ + circpad_delay_t left_bound = circpad_histogram_bin_to_usec(mi, bin_index); + circpad_delay_t right_bound = + circpad_histogram_bin_to_usec(mi, bin_index+1)-1; + + return left_bound + (right_bound - left_bound)/2; +} + +/** + * Return the bin that contains the usec argument. + * "Contains" is defined as us in [lower, upper). + * + * This function will never return the infinity bin (histogram_len-1), + * in order to simplify the rest of the code. + * + * This means that technically the last bin (histogram_len-2) + * has range [start_usec+range_usec, CIRCPAD_DELAY_INFINITE]. + */ +STATIC circpad_hist_index_t +circpad_histogram_usec_to_bin(const circpad_machine_state_t *mi, + circpad_delay_t usec) +{ + const circpad_state_t *state = circpad_machine_current_state(mi); + circpad_delay_t start_usec; + int32_t bin; /* Larger than return type to properly clamp overflow */ + + /* Our state should have been checked to be non-null by the caller + * (circpad_machine_remove_token()) */ + if (BUG(state == NULL)) { + return 0; + } + + if (state->use_rtt_estimate) + start_usec = mi->rtt_estimate_usec+state->start_usec; + else + start_usec = state->start_usec; + + /* The first bin (#0) has zero width and starts (and ends) at start_usec. */ + if (usec <= start_usec) + return 0; + + if (usec == start_usec+1) + return 1; + + const circpad_time_t histogram_range_usec = state->range_usec; + /* We need to find the bin corresponding to our position in the range. + * Since bins are exponentially spaced in powers of two, we need to + * take the log2 of our position in histogram_range_usec. However, + * since tor_log2() returns the floor(log2(u64)), we have to adjust + * it to behave like ceil(log2(u64)). This is verified in our tests + * to properly invert the operation done in + * circpad_histogram_bin_to_usec(). */ + bin = CIRCPAD_INFINITY_BIN(state) - + tor_log2(2*histogram_range_usec/(usec-start_usec+1)); + + /* Clamp the return value to account for timevals before the start + * of bin 0, or after the last bin. Don't return the infinity bin + * index. */ + bin = MIN(MAX(bin, 1), CIRCPAD_INFINITY_BIN(state)-1); + return bin; +} + +/** + * This function frees any token bins allocated from a previous state + * + * Called after a state transition, or if the bins are empty. + */ +STATIC void +circpad_machine_setup_tokens(circpad_machine_state_t *mi) +{ + const circpad_state_t *state = circpad_machine_current_state(mi); + + /* If this state doesn't exist, or doesn't have token removal, + * free any previous state's histogram, and bail */ + if (!state || state->token_removal == CIRCPAD_TOKEN_REMOVAL_NONE) { + if (mi->histogram) { + tor_free(mi->histogram); + mi->histogram = NULL; + mi->histogram_len = 0; + } + return; + } + + /* Try to avoid re-mallocing if we don't really need to */ + if (!mi->histogram || (mi->histogram + && mi->histogram_len != state->histogram_len)) { + tor_free(mi->histogram); // null ok + mi->histogram = tor_malloc_zero(sizeof(circpad_hist_token_t) + *state->histogram_len); + } + mi->histogram_len = state->histogram_len; + + memcpy(mi->histogram, state->histogram, + sizeof(circpad_hist_token_t)*state->histogram_len); +} + +/** + * Choose a length for this state (in cells), if specified. + */ +static void +circpad_choose_state_length(circpad_machine_state_t *mi) +{ + const circpad_state_t *state = circpad_machine_current_state(mi); + double length; + + if (!state || state->length_dist.type == CIRCPAD_DIST_NONE) { + mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE; + return; + } + + length = circpad_distribution_sample(state->length_dist); + length = MAX(0, length); + length += state->start_length; + length = MIN(length, state->max_length); + + mi->state_length = clamp_double_to_int64(length); +} + +/** + * Sample a value from our iat_dist, and clamp it safely + * to circpad_delay_t. + */ +static circpad_delay_t +circpad_distribution_sample_iat_delay(const circpad_state_t *state, + circpad_delay_t start_usec) +{ + double val = circpad_distribution_sample(state->iat_dist); + /* These comparisons are safe, because the output is in the range + * [0, 2**32), and double has a precision of 53 bits. */ + val = MAX(0, val); + val = MIN(val, state->range_usec); + + /* This addition is exact: val is at most 2**32-1, start_usec + * is at most 2**32-1, and doubles have a precision of 53 bits. */ + val += start_usec; + + /* Clamp the distribution at infinite delay val */ + return (circpad_delay_t)MIN(tor_llround(val), CIRCPAD_DELAY_INFINITE); +} + +/** + * Sample an expected time-until-next-packet delay from the histogram. + * + * The bin is chosen with probability proportional to the number + * of tokens in each bin, and then a time value is chosen uniformly from + * that bin's [start,end) time range. + */ +STATIC circpad_delay_t +circpad_machine_sample_delay(circpad_machine_state_t *mi) +{ + const circpad_state_t *state = circpad_machine_current_state(mi); + const circpad_hist_token_t *histogram = NULL; + circpad_hist_index_t curr_bin = 0; + circpad_delay_t bin_start, bin_end; + circpad_delay_t start_usec; + /* These three must all be larger than circpad_hist_token_t, because + * we sum several circpad_hist_token_t values across the histogram */ + uint64_t curr_weight = 0; + uint64_t histogram_total_tokens = 0; + uint64_t bin_choice; + + tor_assert(state); + + if (state->use_rtt_estimate) + start_usec = mi->rtt_estimate_usec+state->start_usec; + else + start_usec = state->start_usec; + + if (state->iat_dist.type != CIRCPAD_DIST_NONE) { + /* Sample from a fixed IAT distribution and return */ + return circpad_distribution_sample_iat_delay(state, start_usec); + } else if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE) { + /* We have a mutable histogram. Do basic sanity check and apply: */ + if (BUG(!mi->histogram) || + BUG(mi->histogram_len != state->histogram_len)) { + return CIRCPAD_DELAY_INFINITE; + } + + histogram = mi->histogram; + for (circpad_hist_index_t b = 0; b < state->histogram_len; b++) + histogram_total_tokens += histogram[b]; + } else { + /* We have a histogram, but it's immutable */ + histogram = state->histogram; + histogram_total_tokens = state->histogram_total_tokens; + } + + bin_choice = crypto_rand_uint64(histogram_total_tokens); + + /* Skip all the initial zero bins */ + while (!histogram[curr_bin]) { + curr_bin++; + } + curr_weight = histogram[curr_bin]; + + // TODO: This is not constant-time. Pretty sure we don't + // really need it to be, though. + while (curr_weight < bin_choice) { + curr_bin++; + /* It should be impossible to run past the end of the histogram */ + if (BUG(curr_bin >= state->histogram_len)) { + return CIRCPAD_DELAY_INFINITE; + } + curr_weight += histogram[curr_bin]; + } + + /* Do some basic checking of the current bin we are in */ + if (BUG(curr_bin >= state->histogram_len) || + BUG(histogram[curr_bin] == 0)) { + return CIRCPAD_DELAY_INFINITE; + } + + // Store this index to remove the token upon callback. + if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE) { + mi->chosen_bin = curr_bin; + } + + if (curr_bin >= CIRCPAD_INFINITY_BIN(state)) { + if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE && + mi->histogram[curr_bin] > 0) { + mi->histogram[curr_bin]--; + } + + // Infinity: Don't send a padding packet. Wait for a real packet + // and then see if our bins are empty or what else we should do. + return CIRCPAD_DELAY_INFINITE; + } + + tor_assert(curr_bin < CIRCPAD_INFINITY_BIN(state)); + + bin_start = circpad_histogram_bin_to_usec(mi, curr_bin); + /* We don't need to reduct 1 from the upper bound because the random range + * function below samples from [bin_start, bin_end) */ + bin_end = circpad_histogram_bin_to_usec(mi, curr_bin+1); + + /* Truncate the high bin in case it's the infinity bin: + * Don't actually schedule an "infinite"-1 delay */ + bin_end = MIN(bin_end, start_usec+state->range_usec); + + // Sample uniformly between histogram[i] to histogram[i+1]-1, + // but no need to sample if they are the same timeval (aka bin 0 or bin 1). + if (bin_end <= bin_start+1) + return bin_start; + else + return (circpad_delay_t)crypto_rand_uint64_range(bin_start, bin_end); +} + +/** + * Sample a value from the specified probability distribution. + * + * This performs inverse transform sampling + * (https://en.wikipedia.org/wiki/Inverse_transform_sampling). + * + * XXX: These formulas were taken verbatim. Need a floating wizard + * to check them for catastropic cancellation and other issues (teor?). + * Also: is 32bits of double from [0.0,1.0) enough? + */ +static double +circpad_distribution_sample(circpad_distribution_t dist) +{ + log_fn(LOG_DEBUG,LD_CIRC, "Sampling delay with distribution %d", + dist.type); + + switch (dist.type) { + case CIRCPAD_DIST_NONE: + { + /* We should not get in here like this */ + tor_assert_nonfatal_unreached(); + return 0; + } + case CIRCPAD_DIST_UNIFORM: + { + // param2 is upper bound, param1 is lower + const struct uniform my_uniform = { + .base = UNIFORM(my_uniform), + .a = dist.param1, + .b = dist.param2, + }; + return dist_sample(&my_uniform.base); + } + case CIRCPAD_DIST_LOGISTIC: + { + /* param1 is Mu, param2 is sigma. */ + const struct logistic my_logistic = { + .base = LOGISTIC(my_logistic), + .mu = dist.param1, + .sigma = dist.param2, + }; + return dist_sample(&my_logistic.base); + } + case CIRCPAD_DIST_LOG_LOGISTIC: + { + /* param1 is Alpha, param2 is 1.0/Beta */ + const struct log_logistic my_log_logistic = { + .base = LOG_LOGISTIC(my_log_logistic), + .alpha = dist.param1, + .beta = dist.param2, + }; + return dist_sample(&my_log_logistic.base); + } + case CIRCPAD_DIST_GEOMETRIC: + { + /* param1 is 'p' (success probability) */ + const struct geometric my_geometric = { + .base = GEOMETRIC(my_geometric), + .p = dist.param1, + }; + return dist_sample(&my_geometric.base); + } + case CIRCPAD_DIST_WEIBULL: + { + /* param1 is k, param2 is Lambda */ + const struct weibull my_weibull = { + .base = WEIBULL(my_weibull), + .k = dist.param1, + .lambda = dist.param2, + }; + return dist_sample(&my_weibull.base); + } + case CIRCPAD_DIST_PARETO: + { + /* param1 is sigma, param2 is xi, no more params for mu so we use 0 */ + const struct genpareto my_genpareto = { + .base = GENPARETO(my_genpareto), + .mu = 0, + .sigma = dist.param1, + .xi = dist.param2, + }; + return dist_sample(&my_genpareto.base); + } + } + + tor_assert_nonfatal_unreached(); + return 0; +} + +/** + * Find the index of the first bin whose upper bound is + * greater than the target, and that has tokens remaining. + */ +static circpad_hist_index_t +circpad_machine_first_higher_index(const circpad_machine_state_t *mi, + circpad_delay_t target_bin_usec) +{ + circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, + target_bin_usec); + + /* Don't remove from the infinity bin */ + for (; bin < CIRCPAD_INFINITY_BIN(mi); bin++) { + if (mi->histogram[bin] && + circpad_histogram_bin_to_usec(mi, bin+1) > target_bin_usec) { + return bin; + } + } + + return mi->histogram_len; +} + +/** + * Find the index of the first bin whose lower bound is lower or equal to + * <b>target_bin_usec</b>, and that still has tokens remaining. + */ +static circpad_hist_index_t +circpad_machine_first_lower_index(const circpad_machine_state_t *mi, + circpad_delay_t target_bin_usec) +{ + circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, + target_bin_usec); + + for (; bin >= 0; bin--) { + if (mi->histogram[bin] && + circpad_histogram_bin_to_usec(mi, bin) <= target_bin_usec) { + return bin; + } + } + + return -1; +} + +/** + * Remove a token from the first non-empty bin whose upper bound is + * greater than the target. + */ +STATIC void +circpad_machine_remove_higher_token(circpad_machine_state_t *mi, + circpad_delay_t target_bin_usec) +{ + /* We need to remove the token from the first bin + * whose upper bound is greater than the target, and that + * has tokens remaining. */ + circpad_hist_index_t bin = circpad_machine_first_higher_index(mi, + target_bin_usec); + + if (bin >= 0 && bin < CIRCPAD_INFINITY_BIN(mi)) { + if (!BUG(mi->histogram[bin] == 0)) { + mi->histogram[bin]--; + } + } +} + +/** + * Remove a token from the first non-empty bin whose upper bound is + * lower than the target. + */ +STATIC void +circpad_machine_remove_lower_token(circpad_machine_state_t *mi, + circpad_delay_t target_bin_usec) +{ + circpad_hist_index_t bin = circpad_machine_first_lower_index(mi, + target_bin_usec); + + if (bin >= 0 && bin < CIRCPAD_INFINITY_BIN(mi)) { + if (!BUG(mi->histogram[bin] == 0)) { + mi->histogram[bin]--; + } + } +} + +/* Helper macro: Ensure that the bin has tokens available, and BUG out of the + * function if it's not the case. */ +#define ENSURE_BIN_CAPACITY(bin_index) \ + if (BUG(mi->histogram[bin_index] == 0)) { \ + return; \ + } + +/** + * Remove a token from the closest non-empty bin to the target. + * + * If use_usec is true, measure "closest" in terms of the next closest bin + * midpoint. + * + * If it is false, use bin index distance only. + */ +STATIC void +circpad_machine_remove_closest_token(circpad_machine_state_t *mi, + circpad_delay_t target_bin_usec, + bool use_usec) +{ + circpad_hist_index_t lower, higher, current; + circpad_hist_index_t bin_to_remove = -1; + + lower = circpad_machine_first_lower_index(mi, target_bin_usec); + higher = circpad_machine_first_higher_index(mi, target_bin_usec); + current = circpad_histogram_usec_to_bin(mi, target_bin_usec); + + /* Sanity check the results */ + if (BUG(lower > current) || BUG(higher < current)) { + return; + } + + /* Take care of edge cases first */ + if (higher == mi->histogram_len && lower == -1) { + /* All bins are empty */ + return; + } else if (higher == mi->histogram_len) { + /* All higher bins are empty */ + ENSURE_BIN_CAPACITY(lower); + mi->histogram[lower]--; + return; + } else if (lower == -1) { + /* All lower bins are empty */ + ENSURE_BIN_CAPACITY(higher); + mi->histogram[higher]--; + return; + } + + /* Now handle the intermediate cases */ + if (use_usec) { + /* Find the closest bin midpoint to the target */ + circpad_delay_t lower_usec = circpad_get_histogram_bin_midpoint(mi, lower); + circpad_delay_t higher_usec = + circpad_get_histogram_bin_midpoint(mi, higher); + + if (target_bin_usec < lower_usec) { + // Lower bin is closer + ENSURE_BIN_CAPACITY(lower); + bin_to_remove = lower; + } else if (target_bin_usec > higher_usec) { + // Higher bin is closer + ENSURE_BIN_CAPACITY(higher); + bin_to_remove = higher; + } else if (target_bin_usec-lower_usec > higher_usec-target_bin_usec) { + // Higher bin is closer + ENSURE_BIN_CAPACITY(higher); + bin_to_remove = higher; + } else { + // Lower bin is closer + ENSURE_BIN_CAPACITY(lower); + bin_to_remove = lower; + } + mi->histogram[bin_to_remove]--; + log_debug(LD_GENERAL, "Removing token from bin %d", bin_to_remove); + return; + } else { + if (current - lower > higher - current) { + // Higher bin is closer + ENSURE_BIN_CAPACITY(higher); + mi->histogram[higher]--; + return; + } else { + // Lower bin is closer + ENSURE_BIN_CAPACITY(lower); + mi->histogram[lower]--; + return; + } + } +} + +#undef ENSURE_BIN_CAPACITY + +/** + * Remove a token from the exact bin corresponding to the target. + * + * If it is empty, do nothing. + */ +static void +circpad_machine_remove_exact(circpad_machine_state_t *mi, + circpad_delay_t target_bin_usec) +{ + circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, + target_bin_usec); + + if (mi->histogram[bin] > 0) + mi->histogram[bin]--; +} + +/** + * Check our state's cell limit count and tokens. + * + * Returns 1 if either limits are hit and we decide to change states, + * otherwise returns 0. + */ +static circpad_decision_t +check_machine_token_supply(circpad_machine_state_t *mi) +{ + uint32_t histogram_total_tokens = 0; + + /* Check if bins empty. This requires summing up the current mutable + * machineinfo histogram token total and checking if it is zero. + * Machineinfo does not keep a running token count. We're assuming the + * extra space is not worth this short loop iteration. + * + * We also do not count infinity bin in histogram totals. + */ + if (mi->histogram_len && mi->histogram) { + for (circpad_hist_index_t b = 0; b < CIRCPAD_INFINITY_BIN(mi); b++) + histogram_total_tokens += mi->histogram[b]; + + /* If we change state, we're done */ + if (histogram_total_tokens == 0) { + if (circpad_internal_event_bins_empty(mi) == CIRCPAD_STATE_CHANGED) + return CIRCPAD_STATE_CHANGED; + } + } + + if (mi->state_length == 0) { + return circpad_internal_event_state_length_up(mi); + } + + return CIRCPAD_STATE_UNCHANGED; +} + +/** + * Remove a token from the bin corresponding to the delta since + * last packet. If that bin is empty, choose a token based on + * the specified removal strategy in the state machine. + * + * This function also updates and checks rate limit and state + * limit counters. + * + * Returns 1 if we transition states, 0 otherwise. + */ +STATIC circpad_decision_t +circpad_machine_remove_token(circpad_machine_state_t *mi) +{ + const circpad_state_t *state = NULL; + circpad_time_t current_time; + circpad_delay_t target_bin_usec; + + /* Update non-padding counts for rate limiting: We scale at UINT16_MAX + * because we only use this for a percentile limit of 2 sig figs, and + * space is scare in the machineinfo struct. */ + mi->nonpadding_sent++; + if (mi->nonpadding_sent == UINT16_MAX) { + mi->padding_sent /= 2; + mi->nonpadding_sent /= 2; + } + + /* Dont remove any tokens if there was no padding scheduled */ + if (!mi->padding_scheduled_at_usec) { + return CIRCPAD_STATE_UNCHANGED; + } + + state = circpad_machine_current_state(mi); + current_time = monotime_absolute_usec(); + + /* If we have scheduled padding some time in the future, we want to see what + bin we are in at the current time */ + target_bin_usec = (circpad_delay_t) + MIN((current_time - mi->padding_scheduled_at_usec), + CIRCPAD_DELAY_INFINITE-1); + + /* We are treating this non-padding cell as a padding cell, so we cancel + padding timer, if present. */ + mi->padding_scheduled_at_usec = 0; + if (mi->is_padding_timer_scheduled) { + mi->is_padding_timer_scheduled = 0; + timer_disable(mi->padding_timer); + } + + /* If we are not in a padding state (like start or end), we're done */ + if (!state) + return CIRCPAD_STATE_UNCHANGED; + + /* If we're enforcing a state length on non-padding packets, + * decrement it */ + if (mi->state_length != CIRCPAD_STATE_LENGTH_INFINITE && + state->length_includes_nonpadding && + mi->state_length > 0) { + mi->state_length--; + } + + /* Perform the specified token removal strategy */ + switch (state->token_removal) { + case CIRCPAD_TOKEN_REMOVAL_NONE: + break; + case CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC: + circpad_machine_remove_closest_token(mi, target_bin_usec, 1); + break; + case CIRCPAD_TOKEN_REMOVAL_CLOSEST: + circpad_machine_remove_closest_token(mi, target_bin_usec, 0); + break; + case CIRCPAD_TOKEN_REMOVAL_LOWER: + circpad_machine_remove_lower_token(mi, target_bin_usec); + break; + case CIRCPAD_TOKEN_REMOVAL_HIGHER: + circpad_machine_remove_higher_token(mi, target_bin_usec); + break; + case CIRCPAD_TOKEN_REMOVAL_EXACT: + circpad_machine_remove_exact(mi, target_bin_usec); + break; + } + + /* Check our token and state length limits */ + return check_machine_token_supply(mi); +} + +/** + * Send a relay command with a relay cell payload on a circuit to + * the particular hopnum. + * + * Hopnum starts at 1 (1=guard, 2=middle, 3=exit, etc). + * + * Payload may be null. + * + * Returns negative on error, 0 on success. + */ +MOCK_IMPL(STATIC signed_error_t, +circpad_send_command_to_hop,(origin_circuit_t *circ, uint8_t hopnum, + uint8_t relay_command, const uint8_t *payload, + ssize_t payload_len)) +{ + crypt_path_t *target_hop = circuit_get_cpath_hop(circ, hopnum); + signed_error_t ret; + + /* Check that the cpath has the target hop */ + if (!target_hop) { + log_fn(LOG_WARN, LD_BUG, "Padding circuit %u has %d hops, not %d", + circ->global_identifier, circuit_get_cpath_len(circ), hopnum); + return -1; + } + + /* Check that the target hop is opened */ + if (target_hop->state != CPATH_STATE_OPEN) { + log_fn(LOG_WARN,LD_CIRC, + "Padding circuit %u has %d hops, not %d", + circ->global_identifier, + circuit_get_cpath_opened_len(circ), hopnum); + return -1; + } + + /* Send the drop command to the second hop */ + ret = relay_send_command_from_edge(0, TO_CIRCUIT(circ), relay_command, + (const char*)payload, payload_len, + target_hop); + return ret; +} + +/** + * Callback helper to send a padding cell. + * + * This helper is called after our histogram-sampled delay period passes + * without another packet being sent first. If a packet is sent before this + * callback happens, it is canceled. So when we're called here, send padding + * right away. + * + * If sending this padding cell forced us to transition states return + * CIRCPAD_STATE_CHANGED. Otherwise return CIRCPAD_STATE_UNCHANGED. + */ +circpad_decision_t +circpad_send_padding_cell_for_callback(circpad_machine_state_t *mi) +{ + circuit_t *circ = mi->on_circ; + int machine_idx = mi->machine_index; + mi->padding_scheduled_at_usec = 0; + circpad_statenum_t state = mi->current_state; + + // Make sure circuit didn't close on us + if (mi->on_circ->marked_for_close) { + log_fn(LOG_INFO,LD_CIRC, + "Padding callback on a circuit marked for close. Ignoring."); + return CIRCPAD_STATE_CHANGED; + } + + /* If it's a histogram, reduce the token count */ + if (mi->histogram && mi->histogram_len) { + /* Basic sanity check on the histogram before removing anything */ + if (BUG(mi->chosen_bin >= mi->histogram_len) || + BUG(mi->histogram[mi->chosen_bin] == 0)) { + return CIRCPAD_STATE_CHANGED; + } + + mi->histogram[mi->chosen_bin]--; + } + + /* If we have a valid state length bound, consider it */ + if (mi->state_length != CIRCPAD_STATE_LENGTH_INFINITE && + !BUG(mi->state_length <= 0)) { + mi->state_length--; + } + + /* + * Update non-padding counts for rate limiting: We scale at UINT16_MAX + * because we only use this for a percentile limit of 2 sig figs, and + * space is scare in the machineinfo struct. + */ + mi->padding_sent++; + if (mi->padding_sent == UINT16_MAX) { + mi->padding_sent /= 2; + mi->nonpadding_sent /= 2; + } + circpad_global_padding_sent++; + + if (CIRCUIT_IS_ORIGIN(mi->on_circ)) { + circpad_send_command_to_hop(TO_ORIGIN_CIRCUIT(mi->on_circ), + CIRCPAD_GET_MACHINE(mi)->target_hopnum, + RELAY_COMMAND_DROP, NULL, 0); + log_fn(LOG_INFO,LD_CIRC, "Callback: Sending padding to origin circuit %u.", + TO_ORIGIN_CIRCUIT(mi->on_circ)->global_identifier); + } else { + // If we're a non-origin circ, we can just send from here as if we're the + // edge. + log_fn(LOG_INFO,LD_CIRC, + "Callback: Sending padding to non-origin circuit."); + relay_send_command_from_edge(0, mi->on_circ, RELAY_COMMAND_DROP, NULL, + 0, NULL); + } + + rep_hist_padding_count_write(PADDING_TYPE_DROP); + /* This is a padding cell sent from the client or from the middle node, + * (because it's invoked from circuitpadding.c) */ + circpad_cell_event_padding_sent(circ); + + /* The circpad_cell_event_padding_sent() could cause us to transition. + * Check that we still have a padding machineinfo, and then check our token + * supply. */ + if (circ->padding_info[machine_idx] != NULL) { + if (state != circ->padding_info[machine_idx]->current_state) + return CIRCPAD_STATE_CHANGED; + else + return check_machine_token_supply(circ->padding_info[machine_idx]); + } else { + return CIRCPAD_STATE_CHANGED; + } +} + +/** + * Tor-timer compatible callback that tells us to send a padding cell. + * + * Timers are associated with circpad_machine_state_t's. When the machineinfo + * is freed on a circuit, the timers are cancelled. Since the lifetime + * of machineinfo is always longer than the timers, handles are not + * needed. + */ +static void +circpad_send_padding_callback(tor_timer_t *timer, void *args, + const struct monotime_t *time) +{ + circpad_machine_state_t *mi = ((circpad_machine_state_t*)args); + (void)timer; (void)time; + + if (mi && mi->on_circ) { + assert_circuit_ok(mi->on_circ); + circpad_send_padding_cell_for_callback(mi); + } else { + // This shouldn't happen (represents a timer leak) + log_fn(LOG_WARN,LD_CIRC, + "Circuit closed while waiting for padding timer."); + tor_fragile_assert(); + } + + // TODO-MP-AP: Unify this counter with channelpadding for rephist stats + //total_timers_pending--; +} + +/** + * Cache our consensus parameters upon consensus update. + */ +void +circpad_new_consensus_params(const networkstatus_t *ns) +{ + circpad_global_allowed_cells = + networkstatus_get_param(ns, "circpad_global_allowed_cells", + 0, 0, UINT16_MAX-1); + + circpad_global_max_padding_percent = + networkstatus_get_param(ns, "circpad_global_max_padding_pct", + 0, 0, 100); +} + +/** + * Check this machine against its padding limits, as well as global + * consensus limits. + * + * We have two limits: a percent and a cell count. The cell count + * limit must be reached before the percent is enforced (this is to + * optionally allow very light padding of things like circuit setup + * while there is no other traffic on the circuit). + * + * TODO: Don't apply limits to machines form torrc. + * + * Returns 1 if limits are set and we've hit them. Otherwise returns 0. + */ +STATIC bool +circpad_machine_reached_padding_limit(circpad_machine_state_t *mi) +{ + const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi); + + /* If machine_padding_pct is non-zero, and we've sent more + * than the allowed count of padding cells, then check our + * percent limits for this machine. */ + if (machine->max_padding_percent && + mi->padding_sent >= machine->allowed_padding_count) { + uint32_t total_cells = mi->padding_sent + mi->nonpadding_sent; + + /* Check the percent */ + if ((100*(uint32_t)mi->padding_sent) / total_cells > + machine->max_padding_percent) { + return 1; // limit is reached. Stop. + } + } + + /* If circpad_max_global_padding_pct is non-zero, and we've + * sent more than the global padding cell limit, then check our + * gloabl tor process percentage limit on padding. */ + if (circpad_global_max_padding_percent && + circpad_global_padding_sent >= circpad_global_allowed_cells) { + uint64_t total_cells = circpad_global_padding_sent + + circpad_global_nonpadding_sent; + + /* Check the percent */ + if ((100*circpad_global_padding_sent) / total_cells > + circpad_global_max_padding_percent) { + return 1; // global limit reached. Stop. + } + } + + return 0; // All good! +} + +/** + * Schedule the next padding time according to the machineinfo on a + * circuit. + * + * The histograms represent inter-packet-delay. Whenever you get an packet + * event you should be scheduling your next timer (after cancelling any old + * ones and updating tokens accordingly). + * + * Returns 1 if we decide to transition states (due to infinity bin), + * 0 otherwise. + */ +MOCK_IMPL(circpad_decision_t, +circpad_machine_schedule_padding,(circpad_machine_state_t *mi)) +{ + circpad_delay_t in_usec = 0; + struct timeval timeout; + tor_assert(mi); + + // Don't pad in end (but also don't cancel any previously + // scheduled padding either). + if (mi->current_state == CIRCPAD_STATE_END) { + log_fn(LOG_INFO, LD_CIRC, "Padding end state"); + return CIRCPAD_STATE_UNCHANGED; + } + + /* Check our padding limits */ + if (circpad_machine_reached_padding_limit(mi)) { + if (CIRCUIT_IS_ORIGIN(mi->on_circ)) { + log_fn(LOG_INFO, LD_CIRC, + "Padding machine has reached padding limit on circuit %u", + TO_ORIGIN_CIRCUIT(mi->on_circ)->global_identifier); + } else { + log_fn(LOG_INFO, LD_CIRC, + "Padding machine has reached padding limit on circuit %"PRIu64 + ", %d", + mi->on_circ->n_chan ? mi->on_circ->n_chan->global_identifier : 0, + mi->on_circ->n_circ_id); + } + return CIRCPAD_STATE_UNCHANGED; + } + + if (mi->is_padding_timer_scheduled) { + /* Cancel current timer (if any) */ + timer_disable(mi->padding_timer); + mi->is_padding_timer_scheduled = 0; + } + + /* in_usec = in microseconds */ + in_usec = circpad_machine_sample_delay(mi); + mi->padding_scheduled_at_usec = monotime_absolute_usec(); + log_fn(LOG_INFO,LD_CIRC,"\tPadding in %u usec", in_usec); + + // Don't schedule if we have infinite delay. + if (in_usec == CIRCPAD_DELAY_INFINITE) { + return circpad_internal_event_infinity(mi); + } + + if (mi->state_length == 0) { + /* If we're at length 0, that means we hit 0 after sending + * a cell earlier, and emitted an event for it, but + * for whatever reason we did not decide to change states then. + * So maybe the machine is waiting for bins empty, or for an + * infinity event later? That would be a strange machine, + * but there's no reason to make it impossible. */ + return CIRCPAD_STATE_UNCHANGED; + } + + if (in_usec <= 0) { + return circpad_send_padding_cell_for_callback(mi); + } + + timeout.tv_sec = in_usec/TOR_USEC_PER_SEC; + timeout.tv_usec = (in_usec%TOR_USEC_PER_SEC); + + log_fn(LOG_INFO, LD_CIRC, "\tPadding in %u sec, %u usec", + (unsigned)timeout.tv_sec, (unsigned)timeout.tv_usec); + + if (mi->padding_timer) { + timer_set_cb(mi->padding_timer, circpad_send_padding_callback, mi); + } else { + mi->padding_timer = + timer_new(circpad_send_padding_callback, mi); + } + timer_schedule(mi->padding_timer, &timeout); + mi->is_padding_timer_scheduled = 1; + + // TODO-MP-AP: Unify with channelpadding counter + //rep_hist_padding_count_timers(++total_timers_pending); + + return CIRCPAD_STATE_UNCHANGED; +} + +/** + * If the machine transitioned to the END state, we need + * to check to see if it wants us to shut it down immediately. + * If it does, then we need to send the appropate negotation commands + * depending on which side it is. + * + * After this function is called, mi may point to freed memory. Do + * not access it. + */ +static void +circpad_machine_spec_transitioned_to_end(circpad_machine_state_t *mi) +{ + const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi); + + /* + * We allow machines to shut down and delete themselves as opposed + * to just going back to START or waiting forever in END so that + * we can handle the case where this machine started while it was + * the only machine that matched conditions, but *since* then more + * "higher ranking" machines now match the conditions, and would + * be given a chance to take precidence over this one in + * circpad_add_matching_machines(). + * + * Returning to START or waiting forever in END would not give those + * other machines a chance to be launched, where as shutting down + * here does. + */ + if (machine->should_negotiate_end) { + circuit_t *on_circ = mi->on_circ; + if (machine->is_origin_side) { + /* We free the machine info here so that we can be replaced + * by a different machine. But we must leave the padding_machine + * in place to wait for the negotiated response */ + circpad_circuit_machineinfo_free_idx(on_circ, + machine->machine_index); + circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(on_circ), + machine->machine_num, + machine->target_hopnum, + CIRCPAD_COMMAND_STOP); + } else { + circpad_circuit_machineinfo_free_idx(on_circ, + machine->machine_index); + circpad_padding_negotiated(on_circ, + machine->machine_num, + CIRCPAD_COMMAND_STOP, + CIRCPAD_RESPONSE_OK); + on_circ->padding_machine[machine->machine_index] = NULL; + } + } +} + +/** + * Generic state transition function for padding state machines. + * + * Given an event and our mutable machine info, decide if/how to + * transition to a different state, and perform actions accordingly. + * + * Returns 1 if we transition states, 0 otherwise. + */ +MOCK_IMPL(circpad_decision_t, +circpad_machine_spec_transition,(circpad_machine_state_t *mi, + circpad_event_t event)) +{ + const circpad_state_t *state = + circpad_machine_current_state(mi); + + /* If state is null we are in the end state. */ + if (!state) { + /* If we in end state we don't pad no matter what. */ + return CIRCPAD_STATE_UNCHANGED; + } + + /* Check if this event is ignored or causes a cancel */ + if (state->next_state[event] == CIRCPAD_STATE_IGNORE) { + return CIRCPAD_STATE_UNCHANGED; + } else if (state->next_state[event] == CIRCPAD_STATE_CANCEL) { + /* Check cancel events and cancel any pending padding */ + mi->padding_scheduled_at_usec = 0; + if (mi->is_padding_timer_scheduled) { + mi->is_padding_timer_scheduled = 0; + /* Cancel current timer (if any) */ + timer_disable(mi->padding_timer); + } + return CIRCPAD_STATE_UNCHANGED; + } else { + circpad_statenum_t s = state->next_state[event]; + /* See if we need to transition to any other states based on this event. + * Whenever a transition happens, even to our own state, we schedule + * padding. + * + * So if a state only wants to schedule padding for an event, it specifies + * a transition to itself. All non-specified events are ignored. + */ + log_fn(LOG_INFO, LD_CIRC, + "Circpad machine %d transitioning from %s to %s", + mi->machine_index, circpad_state_to_string(mi->current_state), + circpad_state_to_string(s)); + + /* If this is not the same state, switch and init tokens, + * otherwise just reschedule padding. */ + if (mi->current_state != s) { + mi->current_state = s; + circpad_machine_setup_tokens(mi); + circpad_choose_state_length(mi); + + /* If we transition to the end state, check to see + * if this machine wants to be shut down at end */ + if (s == CIRCPAD_STATE_END) { + circpad_machine_spec_transitioned_to_end(mi); + /* We transitioned but we don't pad in end. Also, mi + * may be freed. Returning STATE_CHANGED prevents us + * from accessing it in any callers of this function. */ + return CIRCPAD_STATE_CHANGED; + } + + /* We transitioned to a new state, schedule padding */ + circpad_machine_schedule_padding(mi); + return CIRCPAD_STATE_CHANGED; + } + + /* We transitioned back to the same state. Schedule padding, + * and inform if that causes a state transition. */ + return circpad_machine_schedule_padding(mi); + } + + return CIRCPAD_STATE_UNCHANGED; +} + +/** + * Estimate the circuit RTT from the current middle hop out to the + * end of the circuit. + * + * We estimate RTT by calculating the time between "receive" and + * "send" at a middle hop. This is because we "receive" a cell + * from the origin, and then relay it towards the exit before a + * response comes back. It is that response time from the exit side + * that we want to measure, so that we can make use of it for synthetic + * response delays. + */ +static void +circpad_estimate_circ_rtt_on_received(circuit_t *circ, + circpad_machine_state_t *mi) +{ + /* Origin circuits don't estimate RTT. They could do it easily enough, + * but they have no reason to use it in any delay calculations. */ + if (CIRCUIT_IS_ORIGIN(circ) || mi->stop_rtt_update) + return; + + /* If we already have a last receieved packet time, that means we + * did not get a response before this packet. The RTT estimate + * only makes sense if we do not have multiple packets on the + * wire, so stop estimating if this is the second packet + * back to back. However, for the first set of back-to-back + * packets, we can wait until the very first response comes back + * to us, to measure that RTT (for the response to optimistic + * data, for example). Hence stop_rtt_update is only checked + * in this received side function, and not in send side below. + */ + if (mi->last_received_time_usec) { + /* We also allow multiple back-to-back packets if the circuit is not + * opened, to handle var cells. + * XXX: Will this work with out var cell plans? Maybe not, + * since we're opened at the middle hop as soon as we process + * one var extend2 :/ */ + if (circ->state == CIRCUIT_STATE_OPEN) { + log_fn(LOG_INFO, LD_CIRC, + "Stopping padding RTT estimation on circuit (%"PRIu64 + ", %d) after two back to back packets. Current RTT: %d", + circ->n_chan ? circ->n_chan->global_identifier : 0, + circ->n_circ_id, mi->rtt_estimate_usec); + mi->stop_rtt_update = 1; + } + } else { + mi->last_received_time_usec = monotime_absolute_usec(); + } +} + +/** + * Handles the "send" side of RTT calculation at middle nodes. + * + * This function calculates the RTT from the middle to the end + * of the circuit by subtracting the last received cell timestamp + * from the current time. It allows back-to-back cells until + * the circuit is opened, to allow for var cell handshakes. + * XXX: Check our var cell plans to make sure this will work. + */ +static void +circpad_estimate_circ_rtt_on_send(circuit_t *circ, + circpad_machine_state_t *mi) +{ + /* Origin circuits don't estimate RTT. They could do it easily enough, + * but they have no reason to use it in any delay calculations. */ + if (CIRCUIT_IS_ORIGIN(circ)) + return; + + /* If last_received_time_usec is non-zero, we are waiting for a response + * from the exit side. Calculate the time delta and use it as RTT. */ + if (mi->last_received_time_usec) { + circpad_time_t rtt_time = monotime_absolute_usec() - + mi->last_received_time_usec; + + /* Reset the last RTT packet time, so we can tell if two cells + * arrive back to back */ + mi->last_received_time_usec = 0; + + /* Use INT32_MAX to ensure the addition doesn't overflow */ + if (rtt_time >= INT32_MAX) { + log_fn(LOG_WARN,LD_CIRC, + "Circuit padding RTT estimate overflowed: %"PRIu64 + " vs %"PRIu64, monotime_absolute_usec(), + mi->last_received_time_usec); + return; + } + + /* If the old RTT estimate is lower than this one, use this one, because + * the circuit is getting longer. If this estimate is somehow + * faster than the previous, then maybe that was network jitter. + * In that case, average them. */ + if (mi->rtt_estimate_usec < (circpad_delay_t)rtt_time) { + mi->rtt_estimate_usec = (circpad_delay_t)rtt_time; + } else { + mi->rtt_estimate_usec += (circpad_delay_t)rtt_time; + mi->rtt_estimate_usec /= 2; + } + } else if (circ->state == CIRCUIT_STATE_OPEN) { + /* If last_received_time_usec is zero, then we have gotten two cells back + * to back. Stop estimating RTT in this case. Note that we only + * stop RTT update if the circuit is opened, to allow for RTT estimates + * of var cells during circ setup. */ + mi->stop_rtt_update = 1; + + if (!mi->rtt_estimate_usec) { + log_fn(LOG_NOTICE, LD_CIRC, + "Got two cells back to back on a circuit before estimating RTT."); + } + } +} + +/** + * A "non-padding" cell has been sent from this endpoint. React + * according to any padding state machines on the circuit. + * + * For origin circuits, this means we sent a cell into the network. + * For middle relay circuits, this means we sent a cell towards the + * origin. + */ +void +circpad_cell_event_nonpadding_sent(circuit_t *on_circ) +{ + /* Update global cell count */ + circpad_global_nonpadding_sent++; + + /* If there are no machines then this loop should not iterate */ + FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) { + /* First, update any RTT estimate */ + circpad_estimate_circ_rtt_on_send(on_circ, on_circ->padding_info[i]); + + /* Remove a token: this is the idea of adaptive padding, since we have an + * ideal distribution that we want our distribution to look like. */ + if (!circpad_machine_remove_token(on_circ->padding_info[i])) { + /* If removing a token did not cause a transition, check if + * non-padding sent event should */ + circpad_machine_spec_transition(on_circ->padding_info[i], + CIRCPAD_EVENT_NONPADDING_SENT); + } + } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; +} + +/** + * A "non-padding" cell has been received by this endpoint. React + * according to any padding state machines on the circuit. + * + * For origin circuits, this means we read a cell from the network. + * For middle relay circuits, this means we received a cell from the + * origin. + */ +void +circpad_cell_event_nonpadding_received(circuit_t *on_circ) +{ + FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) { + /* First, update any RTT estimate */ + circpad_estimate_circ_rtt_on_received(on_circ, on_circ->padding_info[i]); + + circpad_machine_spec_transition(on_circ->padding_info[i], + CIRCPAD_EVENT_NONPADDING_RECV); + } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; +} + +/** + * A padding cell has been sent from this endpoint. React + * according to any padding state machines on the circuit. + * + * For origin circuits, this means we sent a cell into the network. + * For middle relay circuits, this means we sent a cell towards the + * origin. + */ +void +circpad_cell_event_padding_sent(circuit_t *on_circ) +{ + FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) { + circpad_machine_spec_transition(on_circ->padding_info[i], + CIRCPAD_EVENT_PADDING_SENT); + } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; +} + +/** + * A padding cell has been received by this endpoint. React + * according to any padding state machines on the circuit. + * + * For origin circuits, this means we read a cell from the network. + * For middle relay circuits, this means we received a cell from the + * origin. + */ +void +circpad_cell_event_padding_received(circuit_t *on_circ) +{ + /* identical to padding sent */ + FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) { + circpad_machine_spec_transition(on_circ->padding_info[i], + CIRCPAD_EVENT_PADDING_RECV); + } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; +} + +/** + * An "infinite" delay has ben chosen from one of our histograms. + * + * "Infinite" delays mean don't send padding -- but they can also + * mean transition to another state depending on the state machine + * definitions. Check the rules and react accordingly. + * + * Return 1 if we decide to transition, 0 otherwise. + */ +circpad_decision_t +circpad_internal_event_infinity(circpad_machine_state_t *mi) +{ + return circpad_machine_spec_transition(mi, CIRCPAD_EVENT_INFINITY); +} + +/** + * All of the bins of our current state's histogram's are empty. + * + * Check to see if this means transition to another state, and if + * not, refill the tokens. + * + * Return 1 if we decide to transition, 0 otherwise. + */ +circpad_decision_t +circpad_internal_event_bins_empty(circpad_machine_state_t *mi) +{ + if (circpad_machine_spec_transition(mi, CIRCPAD_EVENT_BINS_EMPTY) + == CIRCPAD_STATE_CHANGED) { + return CIRCPAD_STATE_CHANGED; + } else { + /* If we dont transition, then we refill the tokens */ + circpad_machine_setup_tokens(mi); + return CIRCPAD_STATE_UNCHANGED; + } +} + +/** + * This state has used up its cell count. Emit the event and + * see if we transition. + * + * Return 1 if we decide to transition, 0 otherwise. + */ +circpad_decision_t +circpad_internal_event_state_length_up(circpad_machine_state_t *mi) +{ + return circpad_machine_spec_transition(mi, CIRCPAD_EVENT_LENGTH_COUNT); +} + +/** + * Returns true if the circuit matches the conditions. + */ +static inline bool +circpad_machine_conditions_met(origin_circuit_t *circ, + const circpad_machine_spec_t *machine) +{ + if (!(circpad_circ_purpose_to_mask(TO_CIRCUIT(circ)->purpose) + & machine->conditions.purpose_mask)) + return 0; + + if (machine->conditions.requires_vanguards) { + const or_options_t *options = get_options(); + + /* Pinned middles are effectively vanguards */ + if (!(options->HSLayer2Nodes || options->HSLayer3Nodes)) + return 0; + } + + /* We check for any bits set in the circuit state mask so that machines + * can say any of the following through their state bitmask: + * "I want to apply to circuits with either streams or no streams"; OR + * "I only want to apply to circuits with streams"; OR + * "I only want to apply to circuits without streams". */ + if (!(circpad_circuit_state(circ) & machine->conditions.state_mask)) + return 0; + + if (circuit_get_cpath_opened_len(circ) < machine->conditions.min_hops) + return 0; + + return 1; +} + +/** + * Returns a minimized representation of the circuit state. + * + * The padding code only cares if the circuit is building, + * opened, used for streams, and/or still has relay early cells. + * This returns a bitmask of all state properities that apply to + * this circuit. + */ +static inline +circpad_circuit_state_t +circpad_circuit_state(origin_circuit_t *circ) +{ + circpad_circuit_state_t retmask = 0; + + if (circ->p_streams) + retmask |= CIRCPAD_CIRC_STREAMS; + else + retmask |= CIRCPAD_CIRC_NO_STREAMS; + + /* We use has_opened to prevent cannibialized circs from flapping. */ + if (circ->has_opened) + retmask |= CIRCPAD_CIRC_OPENED; + else + retmask |= CIRCPAD_CIRC_BUILDING; + + if (circ->remaining_relay_early_cells > 0) + retmask |= CIRCPAD_CIRC_HAS_RELAY_EARLY; + else + retmask |= CIRCPAD_CIRC_HAS_NO_RELAY_EARLY; + + return retmask; +} + +/** + * Convert a normal circuit purpose into a bitmask that we can + * use for determining matching circuits. + */ +static inline +circpad_purpose_mask_t +circpad_circ_purpose_to_mask(uint8_t circ_purpose) +{ + /* Treat OR circ purposes as ignored. They should not be passed here*/ + if (BUG(circ_purpose <= CIRCUIT_PURPOSE_OR_MAX_)) { + return 0; + } + + /* Treat new client circuit purposes as "OMG ITS EVERYTHING". + * This also should not happen */ + if (BUG(circ_purpose - CIRCUIT_PURPOSE_OR_MAX_ - 1 > 32)) { + return CIRCPAD_PURPOSE_ALL; + } + + /* Convert the purpose to a bit position */ + return 1 << (circ_purpose - CIRCUIT_PURPOSE_OR_MAX_ - 1); +} + +/** + * Shut down any machines whose conditions no longer match + * the current circuit. + */ +static void +circpad_shutdown_old_machines(origin_circuit_t *on_circ) +{ + circuit_t *circ = TO_CIRCUIT(on_circ); + + FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, circ) { + if (!circpad_machine_conditions_met(on_circ, + circ->padding_machine[i])) { + // Clear machineinfo (frees timers) + circpad_circuit_machineinfo_free_idx(circ, i); + // Send padding negotiate stop + circpad_negotiate_padding(on_circ, + circ->padding_machine[i]->machine_num, + circ->padding_machine[i]->target_hopnum, + CIRCPAD_COMMAND_STOP); + } + } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; +} + +/** + * Negotiate new machines that would apply to this circuit. + * + * This function checks to see if we have any free machine indexes, + * and for each free machine index, it initializes the most recently + * added origin-side padding machine that matches the target machine + * index and circuit conditions, and negotiates it with the appropriate + * middle relay. + */ +static void +circpad_add_matching_machines(origin_circuit_t *on_circ) +{ + circuit_t *circ = TO_CIRCUIT(on_circ); + +#ifdef TOR_UNIT_TESTS + /* Tests don't have to init our padding machines */ + if (!origin_padding_machines) + return; +#endif + + /* If padding negotiation failed before, do not try again */ + if (on_circ->padding_negotiation_failed) + return; + + FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) { + /* If there is a padding machine info, this index is occupied. + * No need to check conditions for this index. */ + if (circ->padding_info[i]) + continue; + + /* We have a free machine index. Check the origin padding + * machines in reverse order, so that more recently added + * machines take priority over older ones. */ + SMARTLIST_FOREACH_REVERSE_BEGIN(origin_padding_machines, + circpad_machine_spec_t *, + machine) { + /* Machine definitions have a specific target machine index. + * This is so event ordering is deterministic with respect + * to which machine gets events first when there are two + * machines installed on a circuit. Make sure we only + * add this machine if its target machine index is free. */ + if (machine->machine_index == i && + circpad_machine_conditions_met(on_circ, machine)) { + + // We can only replace this machine if the target hopnum + // is the same, otherwise we'll get invalid data + if (circ->padding_machine[i]) { + if (circ->padding_machine[i]->target_hopnum != + machine->target_hopnum) + continue; + /* Replace it. (Don't free - is global). */ + circ->padding_machine[i] = NULL; + } + + /* Set up the machine immediately so that the slot is occupied. + * We will tear it down on error return, or if there is an error + * response from the relay. */ + circpad_setup_machine_on_circ(circ, machine); + if (circpad_negotiate_padding(on_circ, machine->machine_num, + machine->target_hopnum, + CIRCPAD_COMMAND_START) < 0) { + circpad_circuit_machineinfo_free_idx(circ, i); + circ->padding_machine[i] = NULL; + on_circ->padding_negotiation_failed = 1; + } else { + /* Success. Don't try any more machines */ + return; + } + } + } SMARTLIST_FOREACH_END(machine); + } FOR_EACH_CIRCUIT_MACHINE_END; +} + +/** + * Event that tells us we added a hop to an origin circuit. + * + * This event is used to decide if we should create a padding machine + * on a circuit. + */ +void +circpad_machine_event_circ_added_hop(origin_circuit_t *on_circ) +{ + /* Since our padding conditions do not specify a max_hops, + * all we can do is add machines here */ + circpad_add_matching_machines(on_circ); +} + +/** + * Event that tells us that an origin circuit is now built. + * + * Shut down any machines that only applied to un-built circuits. + * Activate any new ones. + */ +void +circpad_machine_event_circ_built(origin_circuit_t *circ) +{ + circpad_shutdown_old_machines(circ); + circpad_add_matching_machines(circ); +} + +/** + * Circpad purpose changed event. + * + * Shut down any machines that don't apply to our circ purpose. + * Activate any new ones that do. + */ +void +circpad_machine_event_circ_purpose_changed(origin_circuit_t *circ) +{ + circpad_shutdown_old_machines(circ); + circpad_add_matching_machines(circ); +} + +/** + * Event that tells us that an origin circuit is out of RELAY_EARLY + * cells. + * + * Shut down any machines that only applied to RELAY_EARLY circuits. + * Activate any new ones. + */ +void +circpad_machine_event_circ_has_no_relay_early(origin_circuit_t *circ) +{ + circpad_shutdown_old_machines(circ); + circpad_add_matching_machines(circ); +} + +/** + * Streams attached event. + * + * Called from link_apconn_to_circ() and handle_hs_exit_conn() + * + * Shut down any machines that only applied to machines without + * streams. Activate any new ones. + */ +void +circpad_machine_event_circ_has_streams(origin_circuit_t *circ) +{ + circpad_shutdown_old_machines(circ); + circpad_add_matching_machines(circ); +} + +/** + * Streams detached event. + * + * Called from circuit_detach_stream() + * + * Shut down any machines that only applied to machines without + * streams. Activate any new ones. + */ +void +circpad_machine_event_circ_has_no_streams(origin_circuit_t *circ) +{ + circpad_shutdown_old_machines(circ); + circpad_add_matching_machines(circ); +} + +/** + * Verify that padding is coming from the expected hop. + * + * Returns true if from_hop matches the target hop from + * one of our padding machines. + * + * Returns false if we're not an origin circuit, or if from_hop + * does not match one of the padding machines. + */ +bool +circpad_padding_is_from_expected_hop(circuit_t *circ, + crypt_path_t *from_hop) +{ + crypt_path_t *target_hop = NULL; + if (!CIRCUIT_IS_ORIGIN(circ)) + return 0; + + FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) { + /* We have to check padding_machine and not padding_info/active + * machines here because padding may arrive after we shut down a + * machine. The info is gone, but the padding_machine waits + * for the padding_negotiated response to come back. */ + if (!circ->padding_machine[i]) + continue; + + target_hop = circuit_get_cpath_hop(TO_ORIGIN_CIRCUIT(circ), + circ->padding_machine[i]->target_hopnum); + + if (target_hop == from_hop) + return 1; + } FOR_EACH_CIRCUIT_MACHINE_END; + + return 0; +} + +/** + * Deliver circpad events for an "unrecognized cell". + * + * Unrecognized cells are sent to relays and are forwarded + * onto the next hop of their circuits. Unrecognized cells + * are by definition not padding. We need to tell relay-side + * state machines that a non-padding cell was sent or received, + * depending on the direction, so they can update their histograms + * and decide to pad or not. + */ +void +circpad_deliver_unrecognized_cell_events(circuit_t *circ, + cell_direction_t dir) +{ + // We should never see unrecognized cells at origin. + // Our caller emits a warn when this happens. + if (CIRCUIT_IS_ORIGIN(circ)) { + return; + } + + if (dir == CELL_DIRECTION_OUT) { + /* When direction is out (away from origin), then we received non-padding + cell coming from the origin to us. */ + circpad_cell_event_nonpadding_received(circ); + } else if (dir == CELL_DIRECTION_IN) { + /* It's in and not origin, so the cell is going away from us. + * So we are relaying a non-padding cell towards the origin. */ + circpad_cell_event_nonpadding_sent(circ); + } +} + +/** + * Deliver circpad events for "recognized" relay cells. + * + * Recognized cells are destined for this hop, either client or middle. + * Check if this is a padding cell or not, and send the appropiate + * received event. + */ +void +circpad_deliver_recognized_relay_cell_events(circuit_t *circ, + uint8_t relay_command, + crypt_path_t *layer_hint) +{ + /* Padding negotiate cells are ignored by the state machines + * for simplicity. */ + if (relay_command == RELAY_COMMAND_PADDING_NEGOTIATE || + relay_command == RELAY_COMMAND_PADDING_NEGOTIATED) { + return; + } + + if (relay_command == RELAY_COMMAND_DROP) { + rep_hist_padding_count_read(PADDING_TYPE_DROP); + + if (CIRCUIT_IS_ORIGIN(circ)) { + if (circpad_padding_is_from_expected_hop(circ, layer_hint)) { + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), 0); + } else { + /* This is unexpected padding. Ignore it for now. */ + return; + } + } + + /* The cell should be recognized by now, which means that we are on the + destination, which means that we received a padding cell. We might be + the client or the Middle node, still, because leaky-pipe. */ + circpad_cell_event_padding_received(circ); + log_fn(LOG_INFO, LD_CIRC, "Got padding cell on %s circuit %u.", + CIRCUIT_IS_ORIGIN(circ) ? "origin" : "non-origin", + CIRCUIT_IS_ORIGIN(circ) ? + TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0); + } else { + /* We received a non-padding cell on the edge */ + circpad_cell_event_nonpadding_received(circ); + } +} + +/** + * Deliver circpad events for relay cells sent from us. + * + * If this is a padding cell, update our padding stats + * and deliver the event. Otherwise just deliver the event. + */ +void +circpad_deliver_sent_relay_cell_events(circuit_t *circ, + uint8_t relay_command) +{ + /* Padding negotiate cells are ignored by the state machines + * for simplicity. */ + if (relay_command == RELAY_COMMAND_PADDING_NEGOTIATE || + relay_command == RELAY_COMMAND_PADDING_NEGOTIATED) { + return; + } + + /* RELAY_COMMAND_DROP is the multi-hop (aka circuit-level) padding cell in + * tor. (CELL_PADDING is a channel-level padding cell, which is not relayed + * or processed here) */ + if (relay_command == RELAY_COMMAND_DROP) { + /* Optimization: The event for RELAY_COMMAND_DROP is sent directly + * from circpad_send_padding_cell_for_callback(). This is to avoid + * putting a cell_t and a relay_header_t on the stack repeatedly + * if we decide to send a long train of padidng cells back-to-back + * with 0 delay. So we do nothing here. */ + return; + } else { + /* This is a non-padding cell sent from the client or from + * this node. */ + circpad_cell_event_nonpadding_sent(circ); + } +} + +/** + * Initialize the states array for a circpad machine. + */ +void +circpad_machine_states_init(circpad_machine_spec_t *machine, + circpad_statenum_t num_states) +{ + if (BUG(num_states > CIRCPAD_MAX_MACHINE_STATES)) { + num_states = CIRCPAD_MAX_MACHINE_STATES; + } + + machine->num_states = num_states; + machine->states = tor_malloc_zero(sizeof(circpad_state_t)*num_states); + + /* Initialize the default next state for all events to + * "ignore" -- if events aren't specified, they are ignored. */ + for (circpad_statenum_t s = 0; s < num_states; s++) { + for (int e = 0; e < CIRCPAD_NUM_EVENTS; e++) { + machine->states[s].next_state[e] = CIRCPAD_STATE_IGNORE; + } + } +} + +static void +circpad_setup_machine_on_circ(circuit_t *on_circ, + const circpad_machine_spec_t *machine) +{ + if (CIRCUIT_IS_ORIGIN(on_circ) && !machine->is_origin_side) { + log_fn(LOG_WARN, LD_BUG, + "Can't set up non-origin machine on origin circuit!"); + return; + } + + if (!CIRCUIT_IS_ORIGIN(on_circ) && machine->is_origin_side) { + log_fn(LOG_WARN, LD_BUG, + "Can't set up origin machine on non-origin circuit!"); + return; + } + + tor_assert_nonfatal(on_circ->padding_machine[machine->machine_index] + == NULL); + tor_assert_nonfatal(on_circ->padding_info[machine->machine_index] == NULL); + + on_circ->padding_info[machine->machine_index] = + circpad_circuit_machineinfo_new(on_circ, machine->machine_index); + on_circ->padding_machine[machine->machine_index] = machine; +} + +/* These padding machines are only used for tests pending #28634. */ +#ifdef TOR_UNIT_TESTS +static void +circpad_circ_client_machine_init(void) +{ + circpad_machine_spec_t *circ_client_machine + = tor_malloc_zero(sizeof(circpad_machine_spec_t)); + + // XXX: Better conditions for merge.. Or disable this machine in + // merge? + circ_client_machine->conditions.min_hops = 2; + circ_client_machine->conditions.state_mask = + CIRCPAD_CIRC_BUILDING|CIRCPAD_CIRC_OPENED|CIRCPAD_CIRC_HAS_RELAY_EARLY; + circ_client_machine->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL; + + circ_client_machine->target_hopnum = 2; + circ_client_machine->is_origin_side = 1; + + /* Start, gap, burst */ + circpad_machine_states_init(circ_client_machine, 3); + + circ_client_machine->states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + + circ_client_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + circ_client_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST; + + /* If we are in burst state, and we send a non-padding cell, then we cancel + the timer for the next padding cell: + We dont want to send fake extends when actual extends are going on */ + circ_client_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_CANCEL; + + circ_client_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_BINS_EMPTY] = CIRCPAD_STATE_END; + + circ_client_machine->states[CIRCPAD_STATE_BURST].token_removal = + CIRCPAD_TOKEN_REMOVAL_CLOSEST; + + // FIXME: Tune this histogram + circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_len = 2; + circ_client_machine->states[CIRCPAD_STATE_BURST].start_usec = 500; + circ_client_machine->states[CIRCPAD_STATE_BURST].range_usec = 1000000; + /* We have 5 tokens in the histogram, which means that all circuits will look + * like they have 7 hops (since we start this machine after the second hop, + * and tokens are decremented for any valid hops, and fake extends are + * used after that -- 2+5==7). */ + circ_client_machine->states[CIRCPAD_STATE_BURST].histogram[0] = 5; + circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_total_tokens = 5; + + circ_client_machine->machine_num = smartlist_len(origin_padding_machines); + smartlist_add(origin_padding_machines, circ_client_machine); +} + +static void +circpad_circ_responder_machine_init(void) +{ + circpad_machine_spec_t *circ_responder_machine + = tor_malloc_zero(sizeof(circpad_machine_spec_t)); + + /* Shut down the machine after we've sent enough packets */ + circ_responder_machine->should_negotiate_end = 1; + + /* The relay-side doesn't care what hopnum it is, but for consistency, + * let's match the client */ + circ_responder_machine->target_hopnum = 2; + circ_responder_machine->is_origin_side = 0; + + /* Start, gap, burst */ + circpad_machine_states_init(circ_responder_machine, 3); + + /* This is the settings of the state machine. In the future we are gonna + serialize this into the consensus or the torrc */ + + /* We transition to the burst state on padding receive and on non-padding + * recieve */ + circ_responder_machine->states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST; + circ_responder_machine->states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + + /* Inside the burst state we _stay_ in the burst state when a non-padding + * is sent */ + circ_responder_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_BURST; + + /* Inside the burst state we transition to the gap state when we receive a + * padding cell */ + circ_responder_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_GAP; + + /* These describe the padding charasteristics when in burst state */ + + /* use_rtt_estimate tries to estimate how long padding cells take to go from + C->M, and uses that as what as the base of the histogram */ + circ_responder_machine->states[CIRCPAD_STATE_BURST].use_rtt_estimate = 1; + /* The histogram is 2 bins: an empty one, and infinity */ + circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram_len = 2; + circ_responder_machine->states[CIRCPAD_STATE_BURST].start_usec = 5000; + circ_responder_machine->states[CIRCPAD_STATE_BURST].range_usec = 1000000; + /* During burst state we wait forever for padding to arrive. + + We are waiting for a padding cell from the client to come in, so that we + respond, and we immitate how extend looks like */ + circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram[0] = 0; + // Only infinity bin: + circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram[1] = 1; + circ_responder_machine->states[CIRCPAD_STATE_BURST]. + histogram_total_tokens = 1; + + /* From the gap state, we _stay_ in the gap state, when we receive padding + * or non padding */ + circ_responder_machine->states[CIRCPAD_STATE_GAP]. + next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_GAP; + circ_responder_machine->states[CIRCPAD_STATE_GAP]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_GAP; + + /* And from the gap state, we go to the end, when the bins are empty or a + * non-padding cell is sent */ + circ_responder_machine->states[CIRCPAD_STATE_GAP]. + next_state[CIRCPAD_EVENT_BINS_EMPTY] = CIRCPAD_STATE_END; + circ_responder_machine->states[CIRCPAD_STATE_GAP]. + next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_END; + + // FIXME: Tune this histogram + + /* The gap state is the delay you wait after you receive a padding cell + before you send a padding response */ + circ_responder_machine->states[CIRCPAD_STATE_GAP].use_rtt_estimate = 1; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_len = 6; + circ_responder_machine->states[CIRCPAD_STATE_GAP].start_usec = 5000; + circ_responder_machine->states[CIRCPAD_STATE_GAP].range_usec = 1000000; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[0] = 0; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[1] = 1; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[2] = 2; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[3] = 2; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[4] = 1; + /* Total number of tokens */ + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_total_tokens = 6; + circ_responder_machine->states[CIRCPAD_STATE_GAP].token_removal = + CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC; + + circ_responder_machine->machine_num = smartlist_len(relay_padding_machines); + smartlist_add(relay_padding_machines, circ_responder_machine); +} +#endif + +/** + * Initialize all of our padding machines. + * + * This is called at startup. It sets up some global machines, and then + * loads some from torrc, and from the tor consensus. + */ +void +circpad_machines_init(void) +{ + tor_assert_nonfatal(origin_padding_machines == NULL); + tor_assert_nonfatal(relay_padding_machines == NULL); + + origin_padding_machines = smartlist_new(); + relay_padding_machines = smartlist_new(); + + // TODO: Parse machines from consensus and torrc +#ifdef TOR_UNIT_TESTS + circpad_circ_client_machine_init(); + circpad_circ_responder_machine_init(); +#endif +} + +/** + * Free our padding machines + */ +void +circpad_machines_free(void) +{ + if (origin_padding_machines) { + SMARTLIST_FOREACH(origin_padding_machines, + circpad_machine_spec_t *, + m, tor_free(m->states); tor_free(m)); + smartlist_free(origin_padding_machines); + } + + if (relay_padding_machines) { + SMARTLIST_FOREACH(relay_padding_machines, + circpad_machine_spec_t *, + m, tor_free(m->states); tor_free(m)); + smartlist_free(relay_padding_machines); + } +} + +/** + * Check the Protover info to see if a node supports padding. + */ +static bool +circpad_node_supports_padding(const node_t *node) +{ + if (node->rs) { + log_fn(LOG_INFO, LD_CIRC, "Checking padding: %s", + node->rs->pv.supports_padding ? "supported" : "unsupported"); + return node->rs->pv.supports_padding; + } + + log_fn(LOG_INFO, LD_CIRC, "Empty routerstatus in padding check"); + return 0; +} + +/** + * Get a node_t for the nth hop in our circuit, starting from 1. + * + * Returns node_t from the consensus for that hop, if it is opened. + * Otherwise returns NULL. + */ +static const node_t * +circuit_get_nth_node(origin_circuit_t *circ, int hop) +{ + crypt_path_t *iter = circuit_get_cpath_hop(circ, hop); + + if (!iter || iter->state != CPATH_STATE_OPEN) + return NULL; + + return node_get_by_id(iter->extend_info->identity_digest); +} + +/** + * Return true if a particular circuit supports padding + * at the desired hop. + */ +static bool +circpad_circuit_supports_padding(origin_circuit_t *circ, + int target_hopnum) +{ + const node_t *hop; + + if (!(hop = circuit_get_nth_node(circ, target_hopnum))) { + return 0; + } + + return circpad_node_supports_padding(hop); +} + +/** + * Try to negotiate padding. + * + * Returns -1 on error, 0 on success. + */ +signed_error_t +circpad_negotiate_padding(origin_circuit_t *circ, + circpad_machine_num_t machine, + uint8_t target_hopnum, + uint8_t command) +{ + circpad_negotiate_t type; + cell_t cell; + ssize_t len; + + /* Check that the target hop lists support for padding in + * its ProtoVer fields */ + if (!circpad_circuit_supports_padding(circ, target_hopnum)) { + return -1; + } + + memset(&cell, 0, sizeof(cell_t)); + memset(&type, 0, sizeof(circpad_negotiate_t)); + // This gets reset to RELAY_EARLY appropriately by + // relay_send_command_from_edge_. At least, it looks that way. + // QQQ-MP-AP: Verify that. + cell.command = CELL_RELAY; + + circpad_negotiate_set_command(&type, command); + circpad_negotiate_set_version(&type, 0); + circpad_negotiate_set_machine_type(&type, machine); + + if ((len = circpad_negotiate_encode(cell.payload, CELL_PAYLOAD_SIZE, + &type)) < 0) + return -1; + + log_fn(LOG_INFO,LD_CIRC, "Negotiating padding on circuit %u", + circ->global_identifier); + + return circpad_send_command_to_hop(circ, target_hopnum, + RELAY_COMMAND_PADDING_NEGOTIATE, + cell.payload, len); +} + +/** + * Try to negotiate padding. + * + * Returns 1 if successful (or already set up), 0 otherwise. + */ +bool +circpad_padding_negotiated(circuit_t *circ, + circpad_machine_num_t machine, + uint8_t command, + uint8_t response) +{ + circpad_negotiated_t type; + cell_t cell; + ssize_t len; + + memset(&cell, 0, sizeof(cell_t)); + memset(&type, 0, sizeof(circpad_negotiated_t)); + // This gets reset to RELAY_EARLY appropriately by + // relay_send_command_from_edge_. At least, it looks that way. + // QQQ-MP-AP: Verify that. + cell.command = CELL_RELAY; + + circpad_negotiated_set_command(&type, command); + circpad_negotiated_set_response(&type, response); + circpad_negotiated_set_version(&type, 0); + circpad_negotiated_set_machine_type(&type, machine); + + if ((len = circpad_negotiated_encode(cell.payload, CELL_PAYLOAD_SIZE, + &type)) < 0) + return 0; + + /* Use relay_send because we're from the middle to the origin. We don't + * need to specify a target hop or layer_hint. */ + return relay_send_command_from_edge(0, circ, + RELAY_COMMAND_PADDING_NEGOTIATED, + (void*)cell.payload, + (size_t)len, NULL) == 0; +} + +/** + * Parse and react to a padding_negotiate cell. + * + * This is called at the middle node upon receipt of the client's choice of + * state machine, so that it can use the requested state machine index, if + * it is available. + * + * Returns -1 on error, 0 on success. + */ +signed_error_t +circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell) +{ + int retval = 0; + circpad_negotiate_t *negotiate; + + if (CIRCUIT_IS_ORIGIN(circ)) { + log_fn(LOG_WARN, LD_PROTOCOL, + "Padding negotiate cell unsupported at origin."); + return -1; + } + + if (circpad_negotiate_parse(&negotiate, cell->payload+RELAY_HEADER_SIZE, + CELL_PAYLOAD_SIZE-RELAY_HEADER_SIZE) < 0) { + log_fn(LOG_WARN, LD_CIRC, + "Received malformed PADDING_NEGOTIATE cell; dropping."); + return -1; + } + + if (negotiate->command == CIRCPAD_COMMAND_STOP) { + /* Free the machine corresponding to this machine type */ + free_circ_machineinfos_with_machine_num(circ, negotiate->machine_type); + log_fn(LOG_WARN, LD_CIRC, + "Received circuit padding stop command for unknown machine."); + goto err; + } else if (negotiate->command == CIRCPAD_COMMAND_START) { + SMARTLIST_FOREACH_BEGIN(relay_padding_machines, + const circpad_machine_spec_t *, m) { + if (m->machine_num == negotiate->machine_type) { + circpad_setup_machine_on_circ(circ, m); + goto done; + } + } SMARTLIST_FOREACH_END(m); + } + + err: + retval = -1; + + done: + circpad_padding_negotiated(circ, negotiate->machine_type, + negotiate->command, + (retval == 0) ? CIRCPAD_RESPONSE_OK : CIRCPAD_RESPONSE_ERR); + circpad_negotiate_free(negotiate); + + return retval; +} + +/** + * Parse and react to a padding_negotiated cell. + * + * This is called at the origin upon receipt of the middle's response + * to our choice of state machine. + * + * Returns -1 on error, 0 on success. + */ +signed_error_t +circpad_handle_padding_negotiated(circuit_t *circ, cell_t *cell, + crypt_path_t *layer_hint) +{ + circpad_negotiated_t *negotiated; + + if (!CIRCUIT_IS_ORIGIN(circ)) { + log_fn(LOG_WARN, LD_PROTOCOL, + "Padding negotiated cell unsupported at non-origin."); + return -1; + } + + /* Verify this came from the expected hop */ + if (!circpad_padding_is_from_expected_hop(circ, layer_hint)) { + log_fn(LOG_WARN, LD_PROTOCOL, + "Padding negotiated cell from wrong hop!"); + return -1; + } + + if (circpad_negotiated_parse(&negotiated, cell->payload+RELAY_HEADER_SIZE, + CELL_PAYLOAD_SIZE-RELAY_HEADER_SIZE) < 0) { + log_fn(LOG_WARN, LD_CIRC, + "Received malformed PADDING_NEGOTIATED cell; " + "dropping."); + return -1; + } + + if (negotiated->command == CIRCPAD_COMMAND_STOP) { + /* There may not be a padding_info here if we shut down the + * machine in circpad_shutdown_old_machines(). Or, if + * circpad_add_matching_matchines() added a new machine, + * there may be a padding_machine for a different machine num + * than this response. */ + free_circ_machineinfos_with_machine_num(circ, negotiated->machine_type); + } else if (negotiated->command == CIRCPAD_COMMAND_START && + negotiated->response == CIRCPAD_RESPONSE_ERR) { + // This can happen due to consensus drift.. free the machines + // and be sad + free_circ_machineinfos_with_machine_num(circ, negotiated->machine_type); + TO_ORIGIN_CIRCUIT(circ)->padding_negotiation_failed = 1; + log_fn(LOG_INFO, LD_CIRC, + "Middle node did not accept our padding request."); + } + + circpad_negotiated_free(negotiated); + return 0; +} + +/* Serialization */ +// TODO: Should we use keyword=value here? Are there helpers for that? +#if 0 +static void +circpad_state_serialize(const circpad_state_t *state, + smartlist_t *chunks) +{ + smartlist_add_asprintf(chunks, " %u", state->histogram[0]); + for (int i = 1; i < state->histogram_len; i++) { + smartlist_add_asprintf(chunks, ",%u", + state->histogram[i]); + } + + smartlist_add_asprintf(chunks, " 0x%x", + state->transition_cancel_events); + + for (int i = 0; i < CIRCPAD_NUM_STATES; i++) { + smartlist_add_asprintf(chunks, ",0x%x", + state->transition_events[i]); + } + + smartlist_add_asprintf(chunks, " %u %u", + state->use_rtt_estimate, + state->token_removal); +} + +char * +circpad_machine_spec_to_string(const circpad_machine_spec_t *machine) +{ + smartlist_t *chunks = smartlist_new(); + char *out; + (void)machine; + + circpad_state_serialize(&machine->start, chunks); + circpad_state_serialize(&machine->gap, chunks); + circpad_state_serialize(&machine->burst, chunks); + + out = smartlist_join_strings(chunks, "", 0, NULL); + + SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp)); + smartlist_free(chunks); + return out; +} + +// XXX: Writeme +const circpad_machine_spec_t * +circpad_string_to_machine(const char *str) +{ + (void)str; + return NULL; +} + +#endif diff --git a/src/core/or/circuitpadding.h b/src/core/or/circuitpadding.h new file mode 100644 index 0000000000..628f27ec11 --- /dev/null +++ b/src/core/or/circuitpadding.h @@ -0,0 +1,696 @@ +/* + * Copyright (c) 2017, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file circuitpadding.h + * \brief Header file for circuitpadding.c. + **/ + +#ifndef TOR_CIRCUITPADDING_H +#define TOR_CIRCUITPADDING_H + +#include "src/trunnel/circpad_negotiation.h" +#include "lib/evloop/timers.h" + +struct circuit_t; +struct origin_circuit_t; +struct cell_t; + +/** + * Signed error return with the specific property that negative + * values mean error codes of various semantics, 0 means success, + * and positive values are unused. + * + * XXX: Tor uses this concept a lot but just calls it int. Should we move + * this somewhere centralized? Where? + */ +typedef int signed_error_t; + +/** + * These constants specify the types of events that can cause + * transitions between state machine states. + * + * Note that SENT and RECV are relative to this endpoint. For + * relays, SENT means packets destined towards the client and + * RECV means packets destined towards the relay. On the client, + * SENT means packets destined towards the relay, where as RECV + * means packets destined towards the client. + */ +typedef enum { + /* A non-padding cell was received. */ + CIRCPAD_EVENT_NONPADDING_RECV = 0, + /* A non-padding cell was sent. */ + CIRCPAD_EVENT_NONPADDING_SENT = 1, + /* A padding cell (RELAY_COMMAND_DROP) was sent. */ + CIRCPAD_EVENT_PADDING_SENT = 2, + /* A padding cell was received. */ + CIRCPAD_EVENT_PADDING_RECV = 3, + /* We tried to schedule padding but we ended up picking the infinity bin + * which means that padding was delayed infinitely */ + CIRCPAD_EVENT_INFINITY = 4, + /* All histogram bins are empty (we are out of tokens) */ + CIRCPAD_EVENT_BINS_EMPTY = 5, + /* just a counter of the events above */ + CIRCPAD_EVENT_LENGTH_COUNT = 6 +} circpad_event_t; +#define CIRCPAD_NUM_EVENTS ((int)CIRCPAD_EVENT_LENGTH_COUNT+1) + +/** Boolean type that says if we decided to transition states or not */ +typedef enum { + CIRCPAD_STATE_UNCHANGED = 0, + CIRCPAD_STATE_CHANGED = 1 +} circpad_decision_t; + +/** The type for the things in histogram bins (aka tokens) */ +typedef uint32_t circpad_hist_token_t; + +/** The type for histogram indexes (needs to be negative for errors) */ +typedef int8_t circpad_hist_index_t; + +/** The type for absolute time, from monotime_absolute_usec() */ +typedef uint64_t circpad_time_t; + +/** The type for timer delays, in microseconds */ +typedef uint32_t circpad_delay_t; + +/** + * An infinite padding cell delay means don't schedule any padding -- + * simply wait until a different event triggers a transition. + * + * This means that the maximum delay we can scedule is UINT32_MAX-1 + * microseconds, or about 4300 seconds (1.25 hours). + * XXX: Is this enough if we want to simulate light, intermittent + * activity on an onion service? + */ +#define CIRCPAD_DELAY_INFINITE (UINT32_MAX) + +/** + * Macro to clarify when we're checking the infinity bin. + * + * Works with either circpad_state_t or circpad_machine_state_t + */ +#define CIRCPAD_INFINITY_BIN(mi) ((mi)->histogram_len-1) + +/** + * These constants form a bitfield that specifies when a state machine + * should be applied to a circuit. + * + * If any of these elements is set, then the circuit will be tested against + * that specific condition. If an element is unset, then we don't test it. + * (E.g. If neither NO_STREAMS or STREAMS are set, then we will not care + * whether a circuit has streams attached when we apply a state machine) + * + * The helper function circpad_circuit_state() converts circuit state + * flags into this more compact representation. + */ +typedef enum { + /* Only apply machine if the circuit is still building */ + CIRCPAD_CIRC_BUILDING = 1<<0, + /* Only apply machine if the circuit is open */ + CIRCPAD_CIRC_OPENED = 1<<1, + /* Only apply machine if the circuit has no attached streams */ + CIRCPAD_CIRC_NO_STREAMS = 1<<2, + /* Only apply machine if the circuit has attached streams */ + CIRCPAD_CIRC_STREAMS = 1<<3, + /* Only apply machine if the circuit still allows RELAY_EARLY cells */ + CIRCPAD_CIRC_HAS_RELAY_EARLY = 1<<4, + /* Only apply machine if the circuit has depleted its RELAY_EARLY cells + * allowance. */ + CIRCPAD_CIRC_HAS_NO_RELAY_EARLY = 1<<5 +} circpad_circuit_state_t; + +/** Bitmask that says "apply this machine to all states" */ +#define CIRCPAD_STATE_ALL \ + (CIRCPAD_CIRC_BUILDING|CIRCPAD_CIRC_OPENED| \ + CIRCPAD_CIRC_STREAMS|CIRCPAD_CIRC_NO_STREAMS| \ + CIRCPAD_CIRC_HAS_RELAY_EARLY|CIRCPAD_CIRC_HAS_NO_RELAY_EARLY) + +/** + * A compact circuit purpose bitfield mask that allows us to compactly + * specify which circuit purposes a machine should apply to. + * + * The helper function circpad_circ_purpose_to_mask() converts circuit + * purposes into bit positions in this bitmask. + */ +typedef uint32_t circpad_purpose_mask_t; + +/** Bitmask that says "apply this machine to all purposes". */ +#define CIRCPAD_PURPOSE_ALL (0xFFFFFFFF) + +/** + * This type specifies all of the conditions that must be met before + * a client decides to initiate padding on a circuit. + * + * A circuit must satisfy every sub-field in this type in order + * to be considered to match the conditions. + */ +typedef struct circpad_machine_conditions_t { + /** Only apply the machine *if* the circuit has at least this many hops */ + unsigned min_hops : 3; + + /** Only apply the machine *if* vanguards are enabled */ + unsigned requires_vanguards : 1; + + /** Only apply the machine *if* the circuit's state matches any of + * the bits set in this bitmask. */ + circpad_circuit_state_t state_mask; + + /** Only apply a machine *if* the circuit's purpose matches one + * of the bits set in this bitmask */ + circpad_purpose_mask_t purpose_mask; + +} circpad_machine_conditions_t; + +/** + * Token removal strategy options. + * + * The WTF-PAD histograms are meant to specify a target distribution to shape + * traffic towards. This is accomplished by removing tokens from the histogram + * when either padding or non-padding cells are sent. + * + * When we see a non-padding cell at a particular time since the last cell, you + * remove a token from the corresponding delay bin. These flags specify + * which bin to choose if that bin is already empty. + */ +typedef enum { + /** Don't remove any tokens */ + CIRCPAD_TOKEN_REMOVAL_NONE = 0, + /** + * Remove from the first non-zero higher bin index when current is zero. + * This is the recommended strategy from the Adaptive Padding paper. */ + CIRCPAD_TOKEN_REMOVAL_HIGHER = 1, + /** Remove from the first non-zero lower bin index when current is empty. */ + CIRCPAD_TOKEN_REMOVAL_LOWER = 2, + /** Remove from the closest non-zero bin index when current is empty. */ + CIRCPAD_TOKEN_REMOVAL_CLOSEST = 3, + /** Remove from the closest bin by time value (since bins are + * exponentially spaced). */ + CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC = 4, + /** Only remove from the exact bin corresponding to this delay. If + * the bin is 0, simply do nothing. Don't pick another bin. */ + CIRCPAD_TOKEN_REMOVAL_EXACT = 5 +} circpad_removal_t; + +/** + * Distribution types supported by circpad_distribution_sample(). + * + * These can be used instead of histograms for the inter-packet + * timing distribution, or to specify a distribution on the number + * of cells that can be sent while in a specific state of the state + * machine. */ +typedef enum { + CIRCPAD_DIST_NONE = 0, + CIRCPAD_DIST_UNIFORM = 1, + CIRCPAD_DIST_LOGISTIC = 2, + CIRCPAD_DIST_LOG_LOGISTIC = 3, + CIRCPAD_DIST_GEOMETRIC = 4, + CIRCPAD_DIST_WEIBULL = 5, + CIRCPAD_DIST_PARETO = 6 +} circpad_distribution_type_t; + +/** + * Distribution information. + * + * This type specifies a specific distribution above, as well as + * up to two parameters for that distribution. The specific + * per-distribution meaning of these parameters is specified + * in circpad_distribution_sample(). + */ +typedef struct circpad_distribution_t { + circpad_distribution_type_t type; + double param1; + double param2; +} circpad_distribution_t; + +/** State number type. Represents current state of state machine. */ +typedef uint16_t circpad_statenum_t; +#define CIRCPAD_STATENUM_MAX (UINT16_MAX) + +/** A histogram is used to sample padding delays given a machine state. This + * constant defines the maximum histogram width (i.e. the max number of bins) + * + * Each histogram bin is twice as large as the previous. Two exceptions: The + * first bin has zero width (which means that minimum delay is applied to the + * next padding cell), and the last bin (infinity bin) has infinite width + * (which means that the next padding cell will be delayed infinitely). */ +#define CIRCPAD_MAX_HISTOGRAM_LEN (sizeof(circpad_delay_t)*8 + 1) + +/** + * A state of a padding state machine. The information here are immutable and + * represent the initial form of the state; it does not get updated as things + * happen. The mutable information that gets updated in runtime are carried in + * a circpad_machine_state_t. + * + * This struct describes the histograms and parameters of a single + * state in the adaptive padding machine. Instances of this struct + * exist in global circpad machine definitions that come from torrc + * or the consensus. + */ +typedef struct circpad_state_t { + /** If a histogram is used for this state, this specifies the number of bins + * of this histogram. Histograms must have at least 2 bins. + * + * If a delay probability distribution is used for this state, this is set + * to 0. */ + circpad_hist_index_t histogram_len; + /** The histogram itself: an array of uint16s of tokens, whose + * widths are exponentially spaced, in microseconds */ + circpad_hist_token_t histogram[CIRCPAD_MAX_HISTOGRAM_LEN]; + /** Total number of tokens in this histogram. This is a constant and is *not* + * decremented every time we spend a token. It's used for initializing and + * refilling the histogram. */ + uint32_t histogram_total_tokens; + + /** Minimum padding delay of this state in microseconds. + * + * If histograms are used, this is the left (and right) bound of the first + * bin (since it has zero width). + * + * If a delay probability distribution is used, this represents the minimum + * delay we can sample from the distribution. + */ + circpad_delay_t start_usec; + + /** If histograms are used, this is the width of the whole histogram in + * microseconds, and it's used to calculate individual bin width. + * + * If a delay probability distribution is used, this is used as the max + * delay we can sample from the distribution. + */ + circpad_delay_t range_usec; + + /** + * Represents a delay probability distribution (aka IAT distribution). It's a + * parametrized way of encoding inter-packet delay information in + * microseconds. It can be used instead of histograms. + * + * If it is used, token_removal below must be set to + * CIRCPAD_TOKEN_REMOVAL_NONE. + * + * Start_usec, range_sec, and rtt_estimates are still applied to the + * results of sampling from this distribution (range_sec is used as a max). + */ + circpad_distribution_t iat_dist; + + /** + * The length dist is a parameterized way of encoding how long this + * state machine runs in terms of sent padding cells or all + * sent cells. Values are sampled from this distribution, clamped + * to max_len, and then start_len is added to that value. + * + * It may be specified instead of or in addition to + * the infinity bins and bins empty conditions. */ + circpad_distribution_t length_dist; + /** A minimum length value, added to the output of length_dist */ + uint16_t start_length; + /** A cap on the length value that can be sampled from the length_dist */ + uint64_t max_length; + + /** Should we decrement length when we see a nonpadding packet? + * XXX: Are there any machines that actually want to set this to 0? There may + * not be. OTOH, it's only a bit.. */ + unsigned length_includes_nonpadding : 1; + + /** + * This is an array that specifies the next state to transition to upon + * receipt an event matching the indicated array index. + * + * This aborts our scheduled packet and switches to the state + * corresponding to the index of the array. Tokens are filled upon + * this transition. + * + * States are allowed to transition to themselves, which means re-schedule + * a new padding timer. They are also allowed to temporarily "transition" + * to the "IGNORE" and "CANCEL" pseudo-states. See #defines below + * for details on state behavior and meaning. + */ + circpad_statenum_t next_state[CIRCPAD_NUM_EVENTS]; + + /** + * If true, estimate the RTT from this relay to the exit/website and add that + * to start_usec for use as the histogram bin 0 start delay. + * + * Right now this is only supported for relay-side state machines. + */ + unsigned use_rtt_estimate : 1; + + /** This specifies the token removal strategy to use upon padding and + * non-padding activity. */ + circpad_removal_t token_removal; +} circpad_state_t; + +/** + * The start state for this machine. + * + * In the original WTF-PAD, this is only used for transition to/from + * the burst state. All other fields are not used. But to simplify the + * code we've made it a first-class state. This has no performance + * consequences, but may make naive serialization of the state machine + * large, if we're not careful about how we represent empty fields. + */ +#define CIRCPAD_STATE_START 0 + +/** + * The burst state for this machine. + * + * In the original Adaptive Padding algorithm and in WTF-PAD + * (https://www.freehaven.net/anonbib/cache/ShWa-Timing06.pdf and + * https://www.cs.kau.se/pulls/hot/thebasketcase-wtfpad/), the burst + * state serves to detect bursts in traffic. This is done by using longer + * delays in its histogram, which represent the expected delays between + * bursts of packets in the target stream. If this delay expires without a + * real packet being sent, the burst state sends a padding packet and then + * immediately transitions to the gap state, which is used to generate + * a synthetic padding packet train. In this implementation, this transition + * needs to be explicitly specified in the burst state's transition events. + * + * Because of this flexibility, other padding mechanisms can transition + * between these two states arbitrarily, to encode other dynamics of + * target traffic. + */ +#define CIRCPAD_STATE_BURST 1 + +/** + * The gap state for this machine. + * + * In the original Adaptive Padding algorithm and in WTF-PAD, the gap + * state serves to simulate an artificial packet train composed of padding + * packets. It does this by specifying much lower inter-packet delays than + * the burst state, and transitioning back to itself after padding is sent + * if these timers expire before real traffic is sent. If real traffic is + * sent, it transitions back to the burst state. + * + * Again, in this implementation, these transitions must be specified + * explicitly, and other transitions are also permitted. + */ +#define CIRCPAD_STATE_GAP 2 + +/** + * End is a pseudo-state that causes the machine to go completely + * idle, and optionally get torn down (depending on the + * value of circpad_machine_spec_t.should_negotiate_end) + * + * End MUST NOT occupy a slot in the machine state array. + */ +#define CIRCPAD_STATE_END CIRCPAD_STATENUM_MAX + +/** + * "Ignore" is a pseudo-state that means "do not react to this + * event". + * + * "Ignore" MUST NOT occupy a slot in the machine state array. + */ +#define CIRCPAD_STATE_IGNORE (CIRCPAD_STATENUM_MAX-1) + +/** + * "Cancel" is a pseudo-state that means "cancel pending timers, + * but remain in your current state". + * + * Cancel MUST NOT occupy a slot in the machine state array. + */ +#define CIRCPAD_STATE_CANCEL (CIRCPAD_STATENUM_MAX-2) + +/** + * Since we have 3 pseudo-states, the max state array length is + * up to one less than cancel's statenum. + */ +#define CIRCPAD_MAX_MACHINE_STATES (CIRCPAD_STATE_CANCEL-1) + +/** + * Mutable padding machine info. + * + * This structure contains mutable information about a padding + * machine. The mutable information must be kept separate because + * it exists per-circuit, where as the machines themselves are global. + * This separation is done to conserve space in the circuit structure. + * + * This is the per-circuit state that changes regarding the global state + * machine. Some parts of it are optional (ie NULL). + * + * XXX: Play with layout to minimize space on x64 Linux (most common relay). + */ +typedef struct circpad_machine_state_t { + /** The callback pointer for the padding callbacks. + * + * These timers stick around the machineinfo until the machineinfo's circuit + * is closed, at which point the timer is cancelled. For this reason it's + * safe to assume that the machineinfo exists if this timer gets + * triggered. */ + tor_timer_t *padding_timer; + + /** The circuit for this machine */ + struct circuit_t *on_circ; + + /** A mutable copy of the histogram for the current state. + * NULL if remove_tokens is false for that state */ + circpad_hist_token_t *histogram; + /** Length of the above histogram. + * XXX: This field *could* be removed at the expense of added + * complexity+overhead for reaching back into the immutable machine + * state every time we need to inspect the histogram. It's only a byte, + * though, so it seemed worth it. + */ + circpad_hist_index_t histogram_len; + /** Remove token from this index upon sending padding */ + circpad_hist_index_t chosen_bin; + + /** Stop padding/transition if this many cells sent */ + uint64_t state_length; +#define CIRCPAD_STATE_LENGTH_INFINITE UINT64_MAX + + /** A scaled count of padding packets sent, used to limit padding overhead. + * When this reaches UINT16_MAX, we cut it and nonpadding_sent in half. */ + uint16_t padding_sent; + /** A scaled count of non-padding packets sent, used to limit padding + * overhead. When this reaches UINT16_MAX, we cut it and padding_sent in + * half. */ + uint16_t nonpadding_sent; + + /** + * EWMA estimate of the RTT of the circuit from this hop + * to the exit end, in microseconds. */ + circpad_delay_t rtt_estimate_usec; + + /** + * The last time we got an event relevant to estimating + * the RTT. Monotonic time in microseconds since system + * start. + */ + circpad_time_t last_received_time_usec; + + /** + * The time at which we scheduled a non-padding packet, + * or selected an infinite delay. + * + * Monotonic time in microseconds since system start. + * This is 0 if we haven't chosen a padding delay. + */ + circpad_time_t padding_scheduled_at_usec; + + /** What state is this machine in? */ + circpad_statenum_t current_state; + + /** + * True if we have scheduled a timer for padding. + * + * This is 1 if a timer is pending. It is 0 if + * no timer is scheduled. (It can be 0 even when + * padding_was_scheduled_at_usec is non-zero). + */ + unsigned is_padding_timer_scheduled : 1; + + /** + * If this is true, we have seen full duplex behavior. + * Stop updating the RTT. + */ + unsigned stop_rtt_update : 1; + +/** Max number of padding machines on each circuit. If changed, + * also ensure the machine_index bitwith supports the new size. */ +#define CIRCPAD_MAX_MACHINES (2) + /** Which padding machine index was this for. + * (make sure changes to the bitwidth can support the + * CIRCPAD_MAX_MACHINES define). */ + unsigned machine_index : 1; + +} circpad_machine_state_t; + +/** Helper macro to get an actual state machine from a machineinfo */ +#define CIRCPAD_GET_MACHINE(machineinfo) \ + ((machineinfo)->on_circ->padding_machine[(machineinfo)->machine_index]) + +/** + * This specifies a particular padding machine to use after negotiation. + * + * The constants for machine_num_t are in trunnel. + * We want to be able to define extra numbers in the consensus/torrc, though. + */ +typedef uint8_t circpad_machine_num_t; + +/** Global state machine structure from the consensus */ +typedef struct circpad_machine_spec_t { + /** Global machine number */ + circpad_machine_num_t machine_num; + + /** Which machine index slot should this machine go into in + * the array on the circuit_t */ + unsigned machine_index : 1; + + /** Send a padding negotiate to shut down machine at end state? */ + unsigned should_negotiate_end : 1; + + // These next three fields are origin machine-only... + /** Origin side or relay side */ + unsigned is_origin_side : 1; + + /** Which hop in the circuit should we send padding to/from? + * 1-indexed (ie: hop #1 is guard, #2 middle, #3 exit). */ + unsigned target_hopnum : 3; + + /** This machine only kills fascists if the following conditions are met. */ + circpad_machine_conditions_t conditions; + + /** How many padding cells can be sent before we apply overhead limits? + * XXX: Note that we can only allow up to 64k of padding cells on an + * otherwise quiet circuit. Is this enough? It's 33MB. */ + uint16_t allowed_padding_count; + + /** Padding percent cap: Stop padding if we exceed this percent overhead. + * 0 means no limit. Overhead is defined as percent of total traffic, so + * that we can use 0..100 here. This is the same definition as used in + * Prop#265. */ + uint8_t max_padding_percent; + + /** State array: indexed by circpad_statenum_t */ + circpad_state_t *states; + + /** + * Number of states this machine has (ie: length of the states array). + * XXX: This field is not needed other than for safety. */ + circpad_statenum_t num_states; +} circpad_machine_spec_t; + +void circpad_new_consensus_params(const networkstatus_t *ns); + +/** + * The following are event call-in points that are of interest to + * the state machines. They are called during cell processing. */ +void circpad_deliver_unrecognized_cell_events(struct circuit_t *circ, + cell_direction_t dir); +void circpad_deliver_sent_relay_cell_events(struct circuit_t *circ, + uint8_t relay_command); +void circpad_deliver_recognized_relay_cell_events(struct circuit_t *circ, + uint8_t relay_command, + crypt_path_t *layer_hint); + +/** Cell events are delivered by the above delivery functions */ +void circpad_cell_event_nonpadding_sent(struct circuit_t *on_circ); +void circpad_cell_event_nonpadding_received(struct circuit_t *on_circ); +void circpad_cell_event_padding_sent(struct circuit_t *on_circ); +void circpad_cell_event_padding_received(struct circuit_t *on_circ); + +/** Internal events are events the machines send to themselves */ +circpad_decision_t +circpad_internal_event_infinity(circpad_machine_state_t *mi); +circpad_decision_t +circpad_internal_event_bins_empty(circpad_machine_state_t *); +circpad_decision_t circpad_internal_event_state_length_up( + circpad_machine_state_t *); + +/** Machine creation events are events that cause us to set up or + * tear down padding state machines. */ +void circpad_machine_event_circ_added_hop(struct origin_circuit_t *on_circ); +void circpad_machine_event_circ_built(struct origin_circuit_t *circ); +void circpad_machine_event_circ_purpose_changed(struct origin_circuit_t *circ); +void circpad_machine_event_circ_has_streams(struct origin_circuit_t *circ); +void circpad_machine_event_circ_has_no_streams(struct origin_circuit_t *circ); +void +circpad_machine_event_circ_has_no_relay_early(struct origin_circuit_t *circ); + +void circpad_machines_init(void); +void circpad_machines_free(void); + +void circpad_machine_states_init(circpad_machine_spec_t *machine, + circpad_statenum_t num_states); + +void circpad_circuit_free_all_machineinfos(struct circuit_t *circ); + +bool circpad_padding_is_from_expected_hop(struct circuit_t *circ, + crypt_path_t *from_hop); + +/** Serializaton functions for writing to/from torrc and consensus */ +char *circpad_machine_spec_to_string(const circpad_machine_spec_t *machine); +const circpad_machine_spec_t *circpad_string_to_machine(const char *str); + +/* Padding negotiation between client and middle */ +signed_error_t circpad_handle_padding_negotiate(struct circuit_t *circ, + struct cell_t *cell); +signed_error_t circpad_handle_padding_negotiated(struct circuit_t *circ, + struct cell_t *cell, + crypt_path_t *layer_hint); +signed_error_t circpad_negotiate_padding(struct origin_circuit_t *circ, + circpad_machine_num_t machine, + uint8_t target_hopnum, + uint8_t command); +bool circpad_padding_negotiated(struct circuit_t *circ, + circpad_machine_num_t machine, + uint8_t command, + uint8_t response); + +MOCK_DECL(circpad_decision_t, +circpad_machine_schedule_padding,(circpad_machine_state_t *)); + +MOCK_DECL(circpad_decision_t, +circpad_machine_spec_transition, (circpad_machine_state_t *mi, + circpad_event_t event)); + +circpad_decision_t circpad_send_padding_cell_for_callback( + circpad_machine_state_t *mi); + +#ifdef CIRCUITPADDING_PRIVATE +STATIC circpad_delay_t +circpad_machine_sample_delay(circpad_machine_state_t *mi); + +STATIC bool +circpad_machine_reached_padding_limit(circpad_machine_state_t *mi); + +STATIC +circpad_decision_t circpad_machine_remove_token(circpad_machine_state_t *mi); + +STATIC circpad_delay_t +circpad_histogram_bin_to_usec(const circpad_machine_state_t *mi, + circpad_hist_index_t bin); + +STATIC const circpad_state_t * +circpad_machine_current_state(const circpad_machine_state_t *mi); + +STATIC circpad_hist_index_t circpad_histogram_usec_to_bin( + const circpad_machine_state_t *mi, + circpad_delay_t us); + +STATIC circpad_machine_state_t *circpad_circuit_machineinfo_new( + struct circuit_t *on_circ, + int machine_index); +STATIC void circpad_machine_remove_higher_token(circpad_machine_state_t *mi, + circpad_delay_t target_bin_us); +STATIC void circpad_machine_remove_lower_token(circpad_machine_state_t *mi, + circpad_delay_t target_bin_us); +STATIC void circpad_machine_remove_closest_token(circpad_machine_state_t *mi, + circpad_delay_t target_bin_us, + bool use_usec); +STATIC void circpad_machine_setup_tokens(circpad_machine_state_t *mi); + +MOCK_DECL(STATIC signed_error_t, +circpad_send_command_to_hop,(struct origin_circuit_t *circ, uint8_t hopnum, + uint8_t relay_command, const uint8_t *payload, + ssize_t payload_len)); + +#ifdef TOR_UNIT_TESTS +extern smartlist_t *origin_padding_machines; +extern smartlist_t *relay_padding_machines; +#endif + +#endif + +#endif diff --git a/src/core/or/circuituse.c b/src/core/or/circuituse.c index b7a4ab1b9e..70e3e97ff7 100644 --- a/src/core/or/circuituse.c +++ b/src/core/or/circuituse.c @@ -35,6 +35,7 @@ #include "core/or/circuitlist.h" #include "core/or/circuitstats.h" #include "core/or/circuituse.h" +#include "core/or/circuitpadding.h" #include "core/or/connection_edge.h" #include "core/or/policies.h" #include "feature/client/addressmap.h" @@ -1419,6 +1420,11 @@ circuit_detach_stream(circuit_t *circ, edge_connection_t *conn) if (circ->purpose == CIRCUIT_PURPOSE_S_REND_JOINED) { hs_dec_rdv_stream_counter(origin_circ); } + + /* If there are no more streams on this circ, tell circpad */ + if (!origin_circ->p_streams) + circpad_machine_event_circ_has_no_streams(origin_circ); + return; } } else { @@ -2586,6 +2592,12 @@ link_apconn_to_circ(entry_connection_t *apconn, origin_circuit_t *circ, /* add it into the linked list of streams on this circuit */ log_debug(LD_APP|LD_CIRC, "attaching new conn to circ. n_circ_id %u.", (unsigned)circ->base_.n_circ_id); + + /* If this is the first stream on this circuit, tell circpad + * that streams are attached */ + if (!circ->p_streams) + circpad_machine_event_circ_has_streams(circ); + /* reset it, so we can measure circ timeouts */ ENTRY_TO_CONN(apconn)->timestamp_last_read_allowed = time(NULL); ENTRY_TO_EDGE_CONN(apconn)->next_stream = circ->p_streams; @@ -3064,6 +3076,8 @@ circuit_change_purpose(circuit_t *circ, uint8_t new_purpose) if (CIRCUIT_IS_ORIGIN(circ)) { control_event_circuit_purpose_changed(TO_ORIGIN_CIRCUIT(circ), old_purpose); + + circpad_machine_event_circ_purpose_changed(TO_ORIGIN_CIRCUIT(circ)); } } diff --git a/src/core/or/connection_edge.c b/src/core/or/connection_edge.c index 93383a4e01..6b9ed0f211 100644 --- a/src/core/or/connection_edge.c +++ b/src/core/or/connection_edge.c @@ -67,6 +67,7 @@ #include "core/or/circuitbuild.h" #include "core/or/circuitlist.h" #include "core/or/circuituse.h" +#include "core/or/circuitpadding.h" #include "core/or/connection_edge.h" #include "core/or/connection_or.h" #include "core/or/policies.h" @@ -3712,6 +3713,10 @@ handle_hs_exit_conn(circuit_t *circ, edge_connection_t *conn) /* Link the circuit and the connection crypt path. */ conn->cpath_layer = origin_circ->cpath->prev; + /* If this is the first stream on this circuit, tell circpad */ + if (!origin_circ->p_streams) + circpad_machine_event_circ_has_streams(origin_circ); + /* Add it into the linked list of p_streams on this circuit */ conn->next_stream = origin_circ->p_streams; origin_circ->p_streams = conn; diff --git a/src/core/or/or.h b/src/core/or/or.h index ca373d8ed5..bf5e3957ad 100644 --- a/src/core/or/or.h +++ b/src/core/or/or.h @@ -207,6 +207,9 @@ struct curve25519_public_key_t; #define RELAY_COMMAND_RENDEZVOUS_ESTABLISHED 39 #define RELAY_COMMAND_INTRODUCE_ACK 40 +#define RELAY_COMMAND_PADDING_NEGOTIATE 41 +#define RELAY_COMMAND_PADDING_NEGOTIATED 42 + /* Reasons why an OR connection is closed. */ #define END_OR_CONN_REASON_DONE 1 #define END_OR_CONN_REASON_REFUSED 2 /* connection refused */ @@ -836,6 +839,10 @@ typedef struct protover_summary_flags_t { * service rendezvous point supporting version 3 as seen in proposal 224. * This requires HSRend=2. */ unsigned int supports_v3_rendezvous_point: 1; + + /** True iff this router has a protocol list that allows clients to + * negotiate link-level padding. Requires Padding>=1. */ + unsigned int supports_padding : 1; } protover_summary_flags_t; typedef struct routerinfo_t routerinfo_t; diff --git a/src/core/or/origin_circuit_st.h b/src/core/or/origin_circuit_st.h index 26cdf590f1..921076c1b9 100644 --- a/src/core/or/origin_circuit_st.h +++ b/src/core/or/origin_circuit_st.h @@ -161,6 +161,10 @@ struct origin_circuit_t { * connections to this circuit. */ unsigned int unusable_for_new_conns : 1; + /* If this flag is set (due to padding negotiation failure), we should + * not try to negotiate further circuit padding. */ + unsigned padding_negotiation_failed : 1; + /** * Tristate variable to guard against pathbias miscounting * due to circuit purpose transitions changing the decision diff --git a/src/core/or/protover.c b/src/core/or/protover.c index e80fbfae81..c0c09c9d17 100644 --- a/src/core/or/protover.c +++ b/src/core/or/protover.c @@ -39,6 +39,9 @@ static int protocol_list_contains(const smartlist_t *protos, static const struct { protocol_type_t protover_type; const char *name; +/* If you add a new protocol here, you probably also want to add + * parsing for it in routerstatus_parse_entry_from_string() so that + * it is set in routerstatus_t */ } PROTOCOL_NAMES[] = { { PRT_LINK, "Link" }, { PRT_LINKAUTH, "LinkAuth" }, @@ -49,6 +52,7 @@ static const struct { { PRT_HSREND, "HSRend" }, { PRT_DESC, "Desc" }, { PRT_MICRODESC, "Microdesc"}, + { PRT_PADDING, "Padding"}, { PRT_CONS, "Cons" } }; @@ -396,7 +400,8 @@ protover_get_supported_protocols(void) "LinkAuth=3 " #endif "Microdesc=1-2 " - "Relay=1-2"; + "Relay=1-2 " + "Padding=1"; } /** The protocols from protover_get_supported_protocols(), as parsed into a diff --git a/src/core/or/protover.h b/src/core/or/protover.h index 7319d2f8c4..ffd4f2c18e 100644 --- a/src/core/or/protover.h +++ b/src/core/or/protover.h @@ -43,6 +43,7 @@ typedef enum protocol_type_t { PRT_DESC, PRT_MICRODESC, PRT_CONS, + PRT_PADDING, } protocol_type_t; bool protover_contains_long_protocol_names(const char *s); diff --git a/src/core/or/relay.c b/src/core/or/relay.c index 2e92f2a55d..00c2111955 100644 --- a/src/core/or/relay.c +++ b/src/core/or/relay.c @@ -55,6 +55,7 @@ #include "core/or/circuitbuild.h" #include "core/or/circuitlist.h" #include "core/or/circuituse.h" +#include "core/or/circuitpadding.h" #include "lib/compress/compress.h" #include "app/config/config.h" #include "core/mainloop/connection.h" @@ -80,7 +81,6 @@ #include "feature/nodelist/describe.h" #include "feature/nodelist/routerlist.h" #include "core/or/scheduler.h" -#include "feature/stats/rephist.h" #include "core/or/cell_st.h" #include "core/or/cell_queue_st.h" @@ -293,7 +293,9 @@ circuit_receive_relay_cell(cell_t *cell, circuit_t *circ, return 0; } - /* not recognized. pass it on. */ + /* not recognized. inform circpad and pass it on. */ + circpad_deliver_unrecognized_cell_events(circ, cell_direction); + if (cell_direction == CELL_DIRECTION_OUT) { cell->circ_id = circ->n_circ_id; /* switch it */ chan = circ->n_chan; @@ -353,11 +355,11 @@ circuit_receive_relay_cell(cell_t *cell, circuit_t *circ, * - Encrypt it to the right layer * - Append it to the appropriate cell_queue on <b>circ</b>. */ -static int -circuit_package_relay_cell(cell_t *cell, circuit_t *circ, +MOCK_IMPL(int, +circuit_package_relay_cell, (cell_t *cell, circuit_t *circ, cell_direction_t cell_direction, crypt_path_t *layer_hint, streamid_t on_stream, - const char *filename, int lineno) + const char *filename, int lineno)) { channel_t *chan; /* where to send the cell */ @@ -524,6 +526,8 @@ relay_command_to_string(uint8_t command) case RELAY_COMMAND_INTRODUCE_ACK: return "INTRODUCE_ACK"; case RELAY_COMMAND_EXTEND2: return "EXTEND2"; case RELAY_COMMAND_EXTENDED2: return "EXTENDED2"; + case RELAY_COMMAND_PADDING_NEGOTIATE: return "PADDING_NEGOTIATE"; + case RELAY_COMMAND_PADDING_NEGOTIATED: return "PADDING_NEGOTIATED"; default: tor_snprintf(buf, sizeof(buf), "Unrecognized relay command %u", (unsigned)command); @@ -577,8 +581,8 @@ relay_send_command_from_edge_,(streamid_t stream_id, circuit_t *circ, log_debug(LD_OR,"delivering %d cell %s.", relay_command, cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward"); - if (relay_command == RELAY_COMMAND_DROP) - rep_hist_padding_count_write(PADDING_TYPE_DROP); + /* Tell circpad we're sending a relay cell */ + circpad_deliver_sent_relay_cell_events(circ, relay_command); /* If we are sending an END cell and this circuit is used for a tunneled * directory request, advance its state. */ @@ -602,7 +606,9 @@ relay_send_command_from_edge_,(streamid_t stream_id, circuit_t *circ, * one of them. Don't worry about the conn protocol version: * append_cell_to_circuit_queue will fix it up. */ cell.command = CELL_RELAY_EARLY; - --origin_circ->remaining_relay_early_cells; + /* If we're out of relay early cells, tell circpad */ + if (--origin_circ->remaining_relay_early_cells == 0) + circpad_machine_event_circ_has_no_relay_early(origin_circ); log_debug(LD_OR, "Sending a RELAY_EARLY cell; %d remaining.", (int)origin_circ->remaining_relay_early_cells); /* Memorize the command that is sent as RELAY_EARLY cell; helps debug @@ -1481,9 +1487,11 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ, } } + /* Tell circpad that we've recieved a recognized cell */ + circpad_deliver_recognized_relay_cell_events(circ, rh.command, layer_hint); + /* either conn is NULL, in which case we've got a control cell, or else * conn points to the recognized stream. */ - if (conn && !connection_state_is_open(TO_CONN(conn))) { if (conn->base_.type == CONN_TYPE_EXIT && (conn->base_.state == EXIT_CONN_STATE_CONNECTING || @@ -1504,8 +1512,14 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ, switch (rh.command) { case RELAY_COMMAND_DROP: - rep_hist_padding_count_read(PADDING_TYPE_DROP); -// log_info(domain,"Got a relay-level padding cell. Dropping."); + /* Already examined in circpad_deliver_recognized_relay_cell_events */ + return 0; + case RELAY_COMMAND_PADDING_NEGOTIATE: + circpad_handle_padding_negotiate(circ, cell); + return 0; + case RELAY_COMMAND_PADDING_NEGOTIATED: + if (circpad_handle_padding_negotiated(circ, cell, layer_hint) == 0) + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), rh.length); return 0; case RELAY_COMMAND_BEGIN: case RELAY_COMMAND_BEGIN_DIR: diff --git a/src/core/or/relay.h b/src/core/or/relay.h index db7f17b96c..e84727e373 100644 --- a/src/core/or/relay.h +++ b/src/core/or/relay.h @@ -78,6 +78,11 @@ void destroy_cell_queue_append(destroy_cell_queue_t *queue, void channel_unlink_all_circuits(channel_t *chan, smartlist_t *detached_out); MOCK_DECL(int, channel_flush_from_first_active_circuit, (channel_t *chan, int max)); +MOCK_DECL(int, circuit_package_relay_cell, (cell_t *cell, circuit_t *circ, + cell_direction_t cell_direction, + crypt_path_t *layer_hint, streamid_t on_stream, + const char *filename, int lineno)); + void update_circuit_on_cmux_(circuit_t *circ, cell_direction_t direction, const char *file, int lineno); #define update_circuit_on_cmux(circ, direction) \ diff --git a/src/core/or/versions.c b/src/core/or/versions.c index 7bd1f5899f..736313a9cd 100644 --- a/src/core/or/versions.c +++ b/src/core/or/versions.c @@ -448,6 +448,8 @@ memoize_protover_summary(protover_summary_flags_t *out, out->supports_v3_rendezvous_point = protocol_list_supports_protocol(protocols, PRT_HSREND, PROTOVER_HS_RENDEZVOUS_POINT_V3); + out->supports_padding = + protocol_list_supports_protocol(protocols, PRT_PADDING, 1); protover_summary_flags_t *new_cached = tor_memdup(out, sizeof(*out)); cached = strmap_set(protover_summary_map, protocols, new_cached); diff --git a/src/feature/hibernate/hibernate.c b/src/feature/hibernate/hibernate.c index feeb3d92ef..f10a45f4ae 100644 --- a/src/feature/hibernate/hibernate.c +++ b/src/feature/hibernate/hibernate.c @@ -37,6 +37,7 @@ hibernating, phase 2: #include "core/or/connection_or.h" #include "feature/control/control.h" #include "lib/crypt_ops/crypto_rand.h" +#include "lib/defs/time.h" #include "feature/hibernate/hibernate.h" #include "core/mainloop/mainloop.h" #include "feature/relay/router.h" @@ -832,8 +833,6 @@ hibernate_soft_limit_reached(void) return get_accounting_bytes() >= soft_limit; } -#define TOR_USEC_PER_SEC (1000000) - /** Called when we get a SIGINT, or when bandwidth soft limit is * reached. Puts us into "loose hibernation": we don't accept new * connections, but we continue handling old ones. */ diff --git a/src/feature/nodelist/networkstatus.c b/src/feature/nodelist/networkstatus.c index e1063a0eac..b9c142787a 100644 --- a/src/feature/nodelist/networkstatus.c +++ b/src/feature/nodelist/networkstatus.c @@ -44,6 +44,7 @@ #include "core/mainloop/netstatus.h" #include "core/or/channel.h" #include "core/or/channelpadding.h" +#include "core/or/circuitpadding.h" #include "core/or/circuitmux.h" #include "core/or/circuitmux_ewma.h" #include "core/or/circuitstats.h" @@ -2116,6 +2117,7 @@ networkstatus_set_current_consensus(const char *consensus, circuit_build_times_new_consensus_params( get_circuit_build_times_mutable(), c); channelpadding_new_consensus_params(c); + circpad_new_consensus_params(c); } /* Reset the failure count only if this consensus is actually valid. */ diff --git a/src/feature/nodelist/nodelist.c b/src/feature/nodelist/nodelist.c index d94e73f48f..33601fe1fa 100644 --- a/src/feature/nodelist/nodelist.c +++ b/src/feature/nodelist/nodelist.c @@ -1106,7 +1106,7 @@ node_ed25519_id_matches(const node_t *node, const ed25519_public_key_t *id) /** Dummy object that should be unreturnable. Used to ensure that * node_get_protover_summary_flags() always returns non-NULL. */ static const protover_summary_flags_t zero_protover_flags = { - 0,0,0,0,0,0,0 + 0,0,0,0,0,0,0,0 }; /** Return the protover_summary_flags for a given node. */ @@ -2350,7 +2350,7 @@ compute_frac_paths_available(const networkstatus_t *consensus, const int authdir = authdir_mode_v3(options); count_usable_descriptors(num_present_out, num_usable_out, - mid, consensus, now, NULL, + mid, consensus, now, options->MiddleNodes, USABLE_DESCRIPTOR_ALL); log_debug(LD_NET, "%s: %d present, %d usable", diff --git a/src/feature/nodelist/routerlist.c b/src/feature/nodelist/routerlist.c index b4d56459df..c8a658414b 100644 --- a/src/feature/nodelist/routerlist.c +++ b/src/feature/nodelist/routerlist.c @@ -3221,6 +3221,8 @@ refresh_all_country_info(void) routerset_refresh_countries(options->EntryNodes); if (options->ExitNodes) routerset_refresh_countries(options->ExitNodes); + if (options->MiddleNodes) + routerset_refresh_countries(options->MiddleNodes); if (options->ExcludeNodes) routerset_refresh_countries(options->ExcludeNodes); if (options->ExcludeExitNodes) diff --git a/src/lib/crypt_ops/crypto_rand.c b/src/lib/crypt_ops/crypto_rand.c index cffd0610f3..d148dfb3a8 100644 --- a/src/lib/crypt_ops/crypto_rand.c +++ b/src/lib/crypt_ops/crypto_rand.c @@ -529,6 +529,17 @@ crypto_rand_unmocked(char *to, size_t n) } /** + * Draw an unsigned 32-bit integer uniformly at random. + */ +uint32_t +crypto_rand_u32(void) +{ + uint32_t rand; + crypto_rand((void*)&rand, sizeof(rand)); + return rand; +} + +/** * Return a pseudorandom integer, chosen uniformly from the values * between 0 and <b>max</b>-1 inclusive. <b>max</b> must be between 1 and * INT_MAX+1, inclusive. diff --git a/src/lib/crypt_ops/crypto_rand.h b/src/lib/crypt_ops/crypto_rand.h index 0c538d81ac..874fcd4d08 100644 --- a/src/lib/crypt_ops/crypto_rand.h +++ b/src/lib/crypt_ops/crypto_rand.h @@ -27,6 +27,7 @@ int crypto_rand_int(unsigned int max); int crypto_rand_int_range(unsigned int min, unsigned int max); uint64_t crypto_rand_uint64_range(uint64_t min, uint64_t max); time_t crypto_rand_time_range(time_t min, time_t max); +uint32_t crypto_rand_u32(void); uint64_t crypto_rand_uint64(uint64_t max); double crypto_rand_double(void); struct tor_weak_rng_t; diff --git a/src/lib/defs/include.am b/src/lib/defs/include.am index 48ee7f29fc..6a7f9114ea 100644 --- a/src/lib/defs/include.am +++ b/src/lib/defs/include.am @@ -2,4 +2,5 @@ noinst_HEADERS += \ src/lib/defs/dh_sizes.h \ src/lib/defs/digest_sizes.h \ + src/lib/defs/time.h \ src/lib/defs/x25519_sizes.h diff --git a/src/lib/defs/time.h b/src/lib/defs/time.h new file mode 100644 index 0000000000..762b23feab --- /dev/null +++ b/src/lib/defs/time.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2001, Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_TIME_DEFS_H +#define TOR_TIME_DEFS_H + +/** + * \file time.h + * + * \brief Definitions for timing-related constants. + **/ + +/** How many microseconds per second */ +#define TOR_USEC_PER_SEC (1000000) +/** How many nanoseconds per microsecond */ +#define TOR_NSEC_PER_USEC (1000) +/* How many nanoseconds per millisecond */ +#define TOR_NSEC_PER_MSEC (1000*1000) + +#endif diff --git a/src/lib/math/.may_include b/src/lib/math/.may_include index 1fd26864dc..f8bc264a5f 100644 --- a/src/lib/math/.may_include +++ b/src/lib/math/.may_include @@ -3,3 +3,5 @@ orconfig.h lib/cc/*.h lib/log/*.h lib/math/*.h +lib/testsupport/*.h +lib/crypt_ops/*.h diff --git a/src/lib/math/fp.c b/src/lib/math/fp.c index d5989db637..57082fa468 100644 --- a/src/lib/math/fp.c +++ b/src/lib/math/fp.c @@ -117,3 +117,28 @@ ENABLE_GCC_WARNING(double-promotion) ENABLE_GCC_WARNING(float-conversion) #endif } + +/* isinf() wrapper for tor */ +int +tor_isinf(double x) +{ + /* Same as above, work around the "double promotion" warnings */ +#if defined(MINGW_ANY) && GCC_VERSION >= 409 +#define PROBLEMATIC_FLOAT_CONVERSION_WARNING +DISABLE_GCC_WARNING(float-conversion) +#endif /* defined(MINGW_ANY) && GCC_VERSION >= 409 */ +#if defined(__clang__) +#if __has_warning("-Wdouble-promotion") +#define PROBLEMATIC_DOUBLE_PROMOTION_WARNING +DISABLE_GCC_WARNING(double-promotion) +#endif +#endif /* defined(__clang__) */ + return isinf(x); +#ifdef PROBLEMATIC_DOUBLE_PROMOTION_WARNING +ENABLE_GCC_WARNING(double-promotion) +#endif +#ifdef PROBLEMATIC_FLOAT_CONVERSION_WARNING +ENABLE_GCC_WARNING(float-conversion) +#endif +} + diff --git a/src/lib/math/fp.h b/src/lib/math/fp.h index e27b8f8d80..ddf3ed24d6 100644 --- a/src/lib/math/fp.h +++ b/src/lib/math/fp.h @@ -19,5 +19,6 @@ double tor_mathlog(double d) ATTR_CONST; long tor_lround(double d) ATTR_CONST; int64_t tor_llround(double d) ATTR_CONST; int64_t clamp_double_to_int64(double number); +int tor_isinf(double x); #endif diff --git a/src/lib/math/include.am b/src/lib/math/include.am index b088b3f3cc..6d65ce90a7 100644 --- a/src/lib/math/include.am +++ b/src/lib/math/include.am @@ -7,7 +7,8 @@ endif src_lib_libtor_math_a_SOURCES = \ src/lib/math/fp.c \ - src/lib/math/laplace.c + src/lib/math/laplace.c \ + src/lib/math/prob_distr.c src_lib_libtor_math_testing_a_SOURCES = \ @@ -17,4 +18,5 @@ src_lib_libtor_math_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS) noinst_HEADERS += \ src/lib/math/fp.h \ - src/lib/math/laplace.h + src/lib/math/laplace.h \ + src/lib/math/prob_distr.h diff --git a/src/lib/math/prob_distr.c b/src/lib/math/prob_distr.c new file mode 100644 index 0000000000..4263ba2074 --- /dev/null +++ b/src/lib/math/prob_distr.c @@ -0,0 +1,1717 @@ +/* Copyright (c) 2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file prob_distr.c + * + * \brief + * Implements various probability distributions. + * Almost all code is courtesy of Riastradh. + * + * \details + * Here are some details that might help you understand this file: + * + * - Throughout this file, `eps' means the largest relative error of a + * correctly rounded floating-point operation, which in binary64 + * floating-point arithmetic is 2^-53. Here the relative error of a + * true value x from a computed value y is |x - y|/|x|. This + * definition of epsilon is conventional for numerical analysts when + * writing error analyses. (If your libm doesn't provide correctly + * rounded exp and log, their relative error is usually below 2*2^-53 + * and probably closer to 1.1*2^-53 instead.) + * + * The C constant DBL_EPSILON is actually twice this, and should + * perhaps rather be named ulp(1) -- that is, it is the distance from + * 1 to the next greater floating-point number, which is usually of + * more interest to programmers and hardware engineers. + * + * Since this file is concerned mainly with error bounds rather than + * with low-level bit-hacking of floating-point numbers, we adopt the + * numerical analysts' definition in the comments, though we do use + * DBL_EPSILON in a handful of places where it is convenient to use + * some function of eps = DBL_EPSILON/2 in a case analysis. + * + * - In various functions (e.g. sample_log_logistic()) we jump through hoops so + * that we can use reals closer to 0 than closer to 1, since we achieve much + * greater accuracy for floating point numbers near 0. In particular, we can + * represent differences as small as 10^-300 for numbers near 0, but of no + * less than 10^-16 for numbers near 1. + **/ + +#define PROB_DISTR_PRIVATE + +#include "orconfig.h" + +#include "lib/math/prob_distr.h" + +#include "lib/crypt_ops/crypto_rand.h" +#include "lib/cc/ctassert.h" + +#include <float.h> +#include <math.h> +#include <stddef.h> + +/** Validators for downcasting macros below */ +#define validate_container_of(PTR, TYPE, FIELD) \ + (0 * sizeof((PTR) - &((TYPE *)(((char *)(PTR)) - \ + offsetof(TYPE, FIELD)))->FIELD)) +#define validate_const_container_of(PTR, TYPE, FIELD) \ + (0 * sizeof((PTR) - &((const TYPE *)(((const char *)(PTR)) - \ + offsetof(TYPE, FIELD)))->FIELD)) +/** Downcasting macro */ +#define container_of(PTR, TYPE, FIELD) \ + ((TYPE *)(((char *)(PTR)) - offsetof(TYPE, FIELD)) \ + + validate_container_of(PTR, TYPE, FIELD)) +/** Constified downcasting macro */ +#define const_container_of(PTR, TYPE, FIELD) \ + ((const TYPE *)(((const char *)(PTR)) - offsetof(TYPE, FIELD)) \ + + validate_const_container_of(PTR, TYPE, FIELD)) + +/** + * Count number of one bits in 32-bit word. + */ +static unsigned +bitcount32(uint32_t x) +{ + + /* Count two-bit groups. */ + x -= (x >> 1) & UINT32_C(0x55555555); + + /* Count four-bit groups. */ + x = ((x >> 2) & UINT32_C(0x33333333)) + (x & UINT32_C(0x33333333)); + + /* Count eight-bit groups. */ + x = (x + (x >> 4)) & UINT32_C(0x0f0f0f0f); + + /* Sum all eight-bit groups, and extract the sum. */ + return (x * UINT32_C(0x01010101)) >> 24; +} + +/** + * Count leading zeros in 32-bit word. + */ +static unsigned +clz32(uint32_t x) +{ + + /* Round up to a power of two. */ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + /* Subtract count of one bits from 32. */ + return (32 - bitcount32(x)); +} + +/* + * Some lemmas that will be used throughout this file to prove various error + * bounds: + * + * Lemma 1. If |d| <= 1/2, then 1/(1 + d) <= 2. + * + * Proof. If 0 <= d <= 1/2, then 1 + d >= 1, so that 1/(1 + d) <= 1. + * If -1/2 <= d <= 0, then 1 + d >= 1/2, so that 1/(1 + d) <= 2. QED. + * + * Lemma 2. If b = a*(1 + d)/(1 + d') for |d'| < 1/2 and nonzero a, b, + * then b = a*(1 + e) for |e| <= 2|d' - d|. + * + * Proof. |a - b|/|a| + * = |a - a*(1 + d)/(1 + d')|/|a| + * = |1 - (1 + d)/(1 + d')| + * = |(1 + d' - 1 - d)/(1 + d')| + * = |(d' - d)/(1 + d')| + * <= 2|d' - d|, by Lemma 1, + * + * QED. + * + * Lemma 3. For |d|, |d'| < 1/4, + * + * |log((1 + d)/(1 + d'))| <= 4|d - d'|. + * + * Proof. Write + * + * log((1 + d)/(1 + d')) + * = log(1 + (1 + d)/(1 + d') - 1) + * = log(1 + (1 + d - 1 - d')/(1 + d') + * = log(1 + (d - d')/(1 + d')). + * + * By Lemma 1, |(d - d')/(1 + d')| < 2|d' - d| < 1, so the Taylor + * series of log(1 + x) converges absolutely for (d - d')/(1 + d'), + * and thus we have + * + * |log(1 + (d - d')/(1 + d'))| + * = |\sum_{n=1}^\infty ((d - d')/(1 + d'))^n/n| + * <= \sum_{n=1}^\infty |(d - d')/(1 + d')|^n/n + * <= \sum_{n=1}^\infty |2(d' - d)|^n/n + * <= \sum_{n=1}^\infty |2(d' - d)|^n + * = 1/(1 - |2(d' - d)|) + * <= 4|d' - d|, + * + * QED. + * + * Lemma 4. If 1/e <= 1 + x <= e, then + * + * log(1 + (1 + d) x) = (1 + d') log(1 + x) + * + * for |d'| < 8|d|. + * + * Proof. Write + * + * log(1 + (1 + d) x) + * = log(1 + x + x*d) + * = log((1 + x) (1 + x + x*d)/(1 + x)) + * = log(1 + x) + log((1 + x + x*d)/(1 + x)) + * = log(1 + x) (1 + log((1 + x + x*d)/(1 + x))/log(1 + x)). + * + * The relative error is bounded by + * + * |log((1 + x + x*d)/(1 + x))/log(1 + x)| + * <= 4|x + x*d - x|/|log(1 + x)|, by Lemma 3, + * = 4|x*d|/|log(1 + x)| + * < 8|d|, + * + * since in this range 0 < 1 - 1/e < x/log(1 + x) <= e - 1 < 2. QED. + */ + +/** + * Compute the logistic function: f(x) = 1/(1 + e^{-x}) = e^x/(1 + e^x). + * Maps a log-odds-space probability in [-\infty, +\infty] into a direct-space + * probability in [0,1]. Inverse of logit. + * + * Ill-conditioned for large x; the identity logistic(-x) = 1 - + * logistic(x) and the function logistichalf(x) = logistic(x) - 1/2 may + * help to rearrange a computation. + * + * This implementation gives relative error bounded by 7 eps. + */ +STATIC double +logistic(double x) +{ + if (x <= log(DBL_EPSILON/2)) { + /* + * If x <= log(DBL_EPSILON/2) = log(eps), then e^x <= eps. In this case + * we will approximate the logistic() function with e^x because the + * relative error is less than eps. Here is a calculation of the + * relative error between the logistic() function and e^x and a proof + * that it's less than eps: + * + * |e^x - e^x/(1 + e^x)|/|e^x/(1 + e^x)| + * <= |1 - 1/(1 + e^x)|*|1 + e^x| + * = |e^x/(1 + e^x)|*|1 + e^x| + * = |e^x| + * <= eps. + */ + return exp(x); /* return e^x */ + } else if (x <= -log(DBL_EPSILON/2)) { + /* + * e^{-x} > 0, so 1 + e^{-x} > 1, and 0 < 1/(1 + + * e^{-x}) < 1; further, since e^{-x} < 1 + e^{-x}, we + * also have 0 < 1/(1 + e^{-x}) < 1. Thus, if exp has + * relative error d0, + has relative error d1, and / + * has relative error d2, then we get + * + * (1 + d2)/[(1 + (1 + d0) e^{-x})(1 + d1)] + * = (1 + d0)/[1 + e^{-x} + d0 e^{-x} + * + d1 + d1 e^{-x} + d0 d1 e^{-x}] + * = (1 + d0)/[(1 + e^{-x}) + * * (1 + d0 e^{-x}/(1 + e^{-x}) + * + d1/(1 + e^{-x}) + * + d0 d1 e^{-x}/(1 + e^{-x}))]. + * = (1 + d0)/[(1 + e^{-x})(1 + d')] + * = [1/(1 + e^{-x})] (1 + d0)/(1 + d') + * + * where + * + * d' = d0 e^{-x}/(1 + e^{-x}) + * + d1/(1 + e^{-x}) + * + d0 d1 e^{-x}/(1 + e^{-x}). + * + * By Lemma 2 this relative error is bounded by + * + * 2|d0 - d'| + * = 2|d0 - d0 e^{-x}/(1 + e^{-x}) + * - d1/(1 + e^{-x}) + * - d0 d1 e^{-x}/(1 + e^{-x})| + * <= 2|d0| + 2|d0 e^{-x}/(1 + e^{-x})| + * + 2|d1/(1 + e^{-x})| + * + 2|d0 d1 e^{-x}/(1 + e^{-x})| + * <= 2|d0| + 2|d0| + 2|d1| + 2|d0 d1| + * <= 4|d0| + 2|d1| + 2|d0 d1| + * <= 6 eps + 2 eps^2. + */ + return 1/(1 + exp(-x)); + } else { + /* + * e^{-x} <= eps, so the relative error of 1 from 1/(1 + * + e^{-x}) is + * + * |1/(1 + e^{-x}) - 1|/|1/(1 + e^{-x})| + * = |e^{-x}/(1 + e^{-x})|/|1/(1 + e^{-x})| + * = |e^{-x}| + * <= eps. + * + * This computation avoids an intermediate overflow + * exception, although the effect on the result is + * harmless. + * + * XXX Should maybe raise inexact here. + */ + return 1; + } +} + +/** + * Compute the logit function: log p/(1 - p). Defined on [0,1]. Maps + * a direct-space probability in [0,1] to a log-odds-space probability + * in [-\infty, +\infty]. Inverse of logistic. + * + * Ill-conditioned near 1/2 and 1; the identity logit(1 - p) = + * -logit(p) and the function logithalf(p0) = logit(1/2 + p0) may help + * to rearrange a computation for p in [1/(1 + e), 1 - 1/(1 + e)]. + * + * This implementation gives relative error bounded by 10 eps. + */ +STATIC double +logit(double p) +{ + + /* logistic(-1) <= p <= logistic(+1) */ + if (1/(1 + exp(1)) <= p && p <= 1/(1 + exp(-1))) { + /* + * For inputs near 1/2, we want to compute log1p(near + * 0) rather than log(near 1), so write this as: + * + * log(p/(1 - p)) = -log((1 - p)/p) + * = -log(1 + (1 - p)/p - 1) + * = -log(1 + (1 - p - p)/p) + * = -log(1 + (1 - 2p)/p). + * + * Since p = 2p/2 <= 1 <= 2*2p = 4p, the floating-point + * evaluation of 1 - 2p is exact; the only error arises + * from division and log1p. First, note that if + * logistic(-1) <= p <= logistic(+1), (1 - 2p)/p lies + * in the bounds of Lemma 4. + * + * If division has relative error d0 and log1p has + * relative error d1, the outcome is + * + * -(1 + d1) log(1 + (1 - 2p) (1 + d0)/p) + * = -(1 + d1) (1 + d') log(1 + (1 - 2p)/p) + * = -(1 + d1 + d' + d1 d') log(1 + (1 - 2p)/p). + * + * where |d'| < 8|d0| by Lemma 4. The relative error + * is then bounded by + * + * |d1 + d' + d1 d'| + * <= |d1| + 8|d0| + 8|d1 d0| + * <= 9 eps + 8 eps^2. + */ + return -log1p((1 - 2*p)/p); + } else { + /* + * For inputs near 0, although 1 - p may be rounded to + * 1, it doesn't matter much because the magnitude of + * the result is so much larger. For inputs near 1, we + * can compute 1 - p exactly, although the precision on + * the input is limited so we won't ever get more than + * about 700 for the output. + * + * If - has relative error d0, / has relative error d1, + * and log has relative error d2, then + * + * (1 + d2) log((1 + d0) p/[(1 - p)(1 + d1)]) + * = (1 + d2) [log(p/(1 - p)) + log((1 + d0)/(1 + d1))] + * = log(p/(1 - p)) + d2 log(p/(1 - p)) + * + (1 + d2) log((1 + d0)/(1 + d1)) + * = log(p/(1 - p))*[1 + d2 + + * + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))] + * + * Since 0 <= p < logistic(-1) or logistic(+1) < p <= + * 1, we have |log(p/(1 - p))| > 1. Hence this error + * is bounded by + * + * |d2 + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))| + * <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1)) + * / log(p/(1 - p))| + * <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))| + * <= |d2| + 4|(1 + d2) (d0 - d1)|, by Lemma 3, + * <= |d2| + 4|d0 - d1 + d2 d0 - d1 d0| + * <= |d2| + 4|d0| + 4|d1| + 4|d2 d0| + 4|d1 d0| + * <= 9 eps + 8 eps^2. + */ + return log(p/(1 - p)); + } +} + +/** + * Compute the logit function, translated in input by 1/2: logithalf(p) + * = logit(1/2 + p). Defined on [-1/2, 1/2]. Inverse of logistichalf. + * + * Ill-conditioned near +/-1/2. If |p0| > 1/2 - 1/(1 + e), it may be + * better to compute 1/2 + p0 or -1/2 - p0 and to use logit instead. + * This implementation gives relative error bounded by 34 eps. + */ +STATIC double +logithalf(double p0) +{ + + if (fabs(p0) <= 0.5 - 1/(1 + exp(1))) { + /* + * logit(1/2 + p0) + * = log((1/2 + p0)/(1 - (1/2 + p0))) + * = log((1/2 + p0)/(1/2 - p0)) + * = log(1 + (1/2 + p0)/(1/2 - p0) - 1) + * = log(1 + (1/2 + p0 - (1/2 - p0))/(1/2 - p0)) + * = log(1 + (1/2 + p0 - 1/2 + p0)/(1/2 - p0)) + * = log(1 + 2 p0/(1/2 - p0)) + * + * If the error of subtraction is d0, the error of + * division is d1, and the error of log1p is d2, then + * what we compute is + * + * (1 + d2) log(1 + (1 + d1) 2 p0/[(1 + d0) (1/2 - p0)]) + * = (1 + d2) log(1 + (1 + d') 2 p0/(1/2 - p0)) + * = (1 + d2) (1 + d'') log(1 + 2 p0/(1/2 - p0)) + * = (1 + d2 + d'' + d2 d'') log(1 + 2 p0/(1/2 - p0)), + * + * where |d'| < 2|d0 - d1| <= 4 eps by Lemma 2, and + * |d''| < 8|d'| < 32 eps by Lemma 4 since + * + * 1/e <= 1 + 2*p0/(1/2 - p0) <= e + * + * when |p0| <= 1/2 - 1/(1 + e). Hence the relative + * error is bounded by + * + * |d2 + d'' + d2 d''| + * <= |d2| + |d''| + |d2 d''| + * <= |d1| + 32 |d0| + 32 |d1 d0| + * <= 33 eps + 32 eps^2. + */ + return log1p(2*p0/(0.5 - p0)); + } else { + /* + * We have a choice of computing logit(1/2 + p0) or + * -logit(1 - (1/2 + p0)) = -logit(1/2 - p0). It + * doesn't matter which way we do this: either way, + * since 1/2 p0 <= 1/2 <= 2 p0, the sum and difference + * are computed exactly. So let's do the one that + * skips the final negation. + * + * The result is + * + * (1 + d1) log((1 + d0) (1/2 + p0)/[(1 + d2) (1/2 - p0)]) + * = (1 + d1) (1 + log((1 + d0)/(1 + d2)) + * / log((1/2 + p0)/(1/2 - p0))) + * * log((1/2 + p0)/(1/2 - p0)) + * = (1 + d') log((1/2 + p0)/(1/2 - p0)) + * = (1 + d') logit(1/2 + p0) + * + * where + * + * d' = d1 + log((1 + d0)/(1 + d2))/logit(1/2 + p0) + * + d1 log((1 + d0)/(1 + d2))/logit(1/2 + p0). + * + * For |p| > 1/2 - 1/(1 + e), logit(1/2 + p0) > 1. + * Provided |d0|, |d2| < 1/4, by Lemma 3 we have + * + * |log((1 + d0)/(1 + d2))| <= 4|d0 - d2|. + * + * Hence the relative error is bounded by + * + * |d'| <= |d1| + 4|d0 - d2| + 4|d1| |d0 - d2| + * <= |d1| + 4|d0| + 4|d2| + 4|d1 d0| + 4|d1 d2| + * <= 9 eps + 8 eps^2. + */ + return log((0.5 + p0)/(0.5 - p0)); + } +} + +/* + * The following random_uniform_01 is tailored for IEEE 754 binary64 + * floating-point or smaller. It can be adapted to larger + * floating-point formats like i387 80-bit or IEEE 754 binary128, but + * it may require sampling more bits. + */ +CTASSERT(FLT_RADIX == 2); +CTASSERT(-DBL_MIN_EXP <= 1021); +CTASSERT(DBL_MANT_DIG <= 53); + +/** + * Draw a floating-point number in [0, 1] with uniform distribution. + * + * Note that the probability of returning 0 is less than 2^-1074, so + * callers need not check for it. However, callers that cannot handle + * rounding to 1 must deal with that, because it occurs with + * probability 2^-54, which is small but nonnegligible. + */ +STATIC double +random_uniform_01(void) +{ + uint32_t z, x, hi, lo; + double s; + + /* + * Draw an exponent, geometrically distributed, but give up if + * we get a run of more than 1088 zeros, which really means the + * system is broken. + */ + z = 0; + while ((x = crypto_rand_u32()) == 0) { + if (z >= 1088) + /* Your bit sampler is broken. Go home. */ + return 0; + z += 32; + } + z += clz32(x); + + /* + * Pick 32-bit halves of an odd normalized significand. + * Picking it odd breaks ties in the subsequent rounding, which + * occur only with measure zero in the uniform distribution on + * [0, 1]. + */ + hi = crypto_rand_u32() | UINT32_C(0x80000000); + lo = crypto_rand_u32() | UINT32_C(0x00000001); + + /* Round to nearest scaled significand in [2^63, 2^64]. */ + s = hi*(double)4294967296 + lo; + + /* Rescale into [1/2, 1] and apply exponent in one swell foop. */ + return s * ldexp(1, -(64 + z)); +} + +/*******************************************************************/ + +/* Functions for specific probability distributions start here: */ + +/* + * Logistic(mu, sigma) distribution, supported on (-\infty,+\infty) + * + * This is the uniform distribution on [0,1] mapped into log-odds + * space, scaled by sigma and translated by mu. + * + * pdf(x) = e^{-(x - mu)/sigma} sigma (1 + e^{-(x - mu)/sigma})^2 + * cdf(x) = 1/(1 + e^{-(x - mu)/sigma}) = logistic((x - mu)/sigma) + * sf(x) = 1 - cdf(x) = 1 - logistic((x - mu)/sigma = logistic(-(x - mu)/sigma) + * icdf(p) = mu + sigma log p/(1 - p) = mu + sigma logit(p) + * isf(p) = mu + sigma log (1 - p)/p = mu - sigma logit(p) + */ + +/** + * Compute the CDF of the Logistic(mu, sigma) distribution: the + * logistic function. Well-conditioned for negative inputs and small + * positive inputs; ill-conditioned for large positive inputs. + */ +STATIC double +cdf_logistic(double x, double mu, double sigma) +{ + return logistic((x - mu)/sigma); +} + +/** + * Compute the SF of the Logistic(mu, sigma) distribution: the logistic + * function reflected over the y axis. Well-conditioned for positive + * inputs and small negative inputs; ill-conditioned for large negative + * inputs. + */ +STATIC double +sf_logistic(double x, double mu, double sigma) +{ + return logistic(-(x - mu)/sigma); +} + +/** + * Compute the inverse of the CDF of the Logistic(mu, sigma) + * distribution: the logit function. Well-conditioned near 0; + * ill-conditioned near 1/2 and 1. + */ +STATIC double +icdf_logistic(double p, double mu, double sigma) +{ + return mu + sigma*logit(p); +} + +/** + * Compute the inverse of the SF of the Logistic(mu, sigma) + * distribution: the -logit function. Well-conditioned near 0; + * ill-conditioned near 1/2 and 1. + */ +STATIC double +isf_logistic(double p, double mu, double sigma) +{ + return mu - sigma*logit(p); +} + +/* + * LogLogistic(alpha, beta) distribution, supported on (0, +\infty). + * + * This is the uniform distribution on [0,1] mapped into odds space, + * scaled by positive alpha and shaped by positive beta. + * + * Equivalent to computing exp of a Logistic(log alpha, 1/beta) sample. + * (Name arises because the pdf has LogLogistic(x; alpha, beta) = + * Logistic(log x; log alpha, 1/beta) and mathematicians got their + * covariance contravariant.) + * + * pdf(x) = (beta/alpha) (x/alpha)^{beta - 1}/(1 + (x/alpha)^beta)^2 + * = (1/e^mu sigma) (x/e^mu)^{1/sigma - 1} / + * (1 + (x/e^mu)^{1/sigma})^2 + * cdf(x) = 1/(1 + (x/alpha)^-beta) = 1/(1 + (x/e^mu)^{-1/sigma}) + * = 1/(1 + (e^{log x}/e^mu)^{-1/sigma}) + * = 1/(1 + (e^{log x - mu})^{-1/sigma}) + * = 1/(1 + e^{-(log x - mu)/sigma}) + * = logistic((log x - mu)/sigma) + * = logistic((log x - log alpha)/(1/beta)) + * sf(x) = 1 - 1/(1 + (x/alpha)^-beta) + * = (x/alpha)^-beta/(1 + (x/alpha)^-beta) + * = 1/((x/alpha)^beta + 1) + * = 1/(1 + (x/alpha)^beta) + * icdf(p) = alpha (p/(1 - p))^{1/beta} + * = alpha e^{logit(p)/beta} + * = e^{mu + sigma logit(p)} + * isf(p) = alpha ((1 - p)/p)^{1/beta} + * = alpha e^{-logit(p)/beta} + * = e^{mu - sigma logit(p)} + */ + +/** + * Compute the CDF of the LogLogistic(alpha, beta) distribution. + * Well-conditioned for all x and alpha, and the condition number + * + * -beta/[1 + (x/alpha)^{-beta}] + * + * grows linearly with beta. + * + * Loosely, the relative error of this implementation is bounded by + * + * 4 eps + 2 eps^2 + O(beta eps), + * + * so don't bother trying this for beta anywhere near as large as + * 1/eps, around which point it levels off at 1. + */ +STATIC double +cdf_log_logistic(double x, double alpha, double beta) +{ + /* + * Let d0 be the error of x/alpha; d1, of pow; d2, of +; and + * d3, of the final quotient. The exponentiation gives + * + * ((1 + d0) x/alpha)^{-beta} + * = (x/alpha)^{-beta} (1 + d0)^{-beta} + * = (x/alpha)^{-beta} (1 + (1 + d0)^{-beta} - 1) + * = (x/alpha)^{-beta} (1 + d') + * + * where d' = (1 + d0)^{-beta} - 1. If y = (x/alpha)^{-beta}, + * the denominator is + * + * (1 + d2) (1 + (1 + d1) (1 + d') y) + * = (1 + d2) (1 + y + (d1 + d' + d1 d') y) + * = 1 + y + (1 + d2) (d1 + d' + d1 d') y + * = (1 + y) (1 + (1 + d2) (d1 + d' + d1 d') y/(1 + y)) + * = (1 + y) (1 + d''), + * + * where d'' = (1 + d2) (d1 + d' + d1 d') y/(1 + y). The + * final result is + * + * (1 + d3) / [(1 + d2) (1 + d'') (1 + y)] + * = (1 + d''') / (1 + y) + * + * for |d'''| <= 2|d3 - d''| by Lemma 2 as long as |d''| < 1/2 + * (which may not be the case for very large beta). This + * relative error is therefore bounded by + * + * |d'''| + * <= 2|d3 - d''| + * <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d') y/(1 + y)| + * <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d')| + * = 2|d3| + 2|d1 + d' + d1 d' + d2 d1 + d2 d' + d2 d1 d'| + * <= 2|d3| + 2|d1| + 2|d'| + 2|d1 d'| + 2|d2 d1| + 2|d2 d'| + * + 2|d2 d1 d'| + * <= 4 eps + 2 eps^2 + (2 + 2 eps + 2 eps^2) |d'|. + * + * Roughly, |d'| = |(1 + d0)^{-beta} - 1| grows like beta eps, + * until it levels off at 1. + */ + return 1/(1 + pow(x/alpha, -beta)); +} + +/** + * Compute the SF of the LogLogistic(alpha, beta) distribution. + * Well-conditioned for all x and alpha, and the condition number + * + * beta/[1 + (x/alpha)^beta] + * + * grows linearly with beta. + * + * Loosely, the relative error of this implementation is bounded by + * + * 4 eps + 2 eps^2 + O(beta eps) + * + * so don't bother trying this for beta anywhere near as large as + * 1/eps, beyond which point it grows unbounded. + */ +STATIC double +sf_log_logistic(double x, double alpha, double beta) +{ + /* + * The error analysis here is essentially the same as in + * cdf_log_logistic, except that rather than levelling off at + * 1, |(1 + d0)^beta - 1| grows unbounded. + */ + return 1/(1 + pow(x/alpha, beta)); +} + +/** + * Compute the inverse of the CDF of the LogLogistic(alpha, beta) + * distribution. Ill-conditioned for p near 1 and beta near 0 with + * condition number 1/[beta (1 - p)]. + */ +STATIC double +icdf_log_logistic(double p, double alpha, double beta) +{ + return alpha*pow(p/(1 - p), 1/beta); +} + +/** + * Compute the inverse of the SF of the LogLogistic(alpha, beta) + * distribution. Ill-conditioned for p near 1 and for large beta, with + * condition number -1/[beta (1 - p)]. + */ +STATIC double +isf_log_logistic(double p, double alpha, double beta) +{ + return alpha*pow((1 - p)/p, 1/beta); +} + +/* + * Weibull(lambda, k) distribution, supported on (0, +\infty). + * + * pdf(x) = (k/lambda) (x/lambda)^{k - 1} e^{-(x/lambda)^k} + * cdf(x) = 1 - e^{-(x/lambda)^k} + * icdf(p) = lambda * (-log (1 - p))^{1/k} + * sf(x) = e^{-(x/lambda)^k} + * isf(p) = lambda * (-log p)^{1/k} + */ + +/** + * Compute the CDF of the Weibull(lambda, k) distribution. + * Well-conditioned for small x and k, and for large lambda -- + * condition number + * + * -k (x/lambda)^k exp(-(x/lambda)^k)/[exp(-(x/lambda)^k) - 1] + * + * grows linearly with k, x^k, and lambda^{-k}. + */ +STATIC double +cdf_weibull(double x, double lambda, double k) +{ + return -expm1(-pow(x/lambda, k)); +} + +/** + * Compute the SF of the Weibull(lambda, k) distribution. + * Well-conditioned for small x and k, and for large lambda -- + * condition number + * + * -k (x/lambda)^k + * + * grows linearly with k, x^k, and lambda^{-k}. + */ +STATIC double +sf_weibull(double x, double lambda, double k) +{ + return exp(-pow(x/lambda, k)); +} + +/** + * Compute the inverse of the CDF of the Weibull(lambda, k) + * distribution. Ill-conditioned for p near 1, and for k near 0; + * condition number is + * + * (p/(1 - p))/(k log(1 - p)). + */ +STATIC double +icdf_weibull(double p, double lambda, double k) +{ + return lambda*pow(-log1p(-p), 1/k); +} + +/** + * Compute the inverse of the SF of the Weibull(lambda, k) + * distribution. Ill-conditioned for p near 0, and for k near 0; + * condition number is + * + * 1/(k log(p)). + */ +STATIC double +isf_weibull(double p, double lambda, double k) +{ + return lambda*pow(-log(p), 1/k); +} + +/* + * GeneralizedPareto(mu, sigma, xi), supported on (mu, +\infty) for + * nonnegative xi, or (mu, mu - sigma/xi) for negative xi. + * + * Samples: + * = mu - sigma log U, if xi = 0; + * = mu + sigma (U^{-xi} - 1)/xi = mu + sigma*expm1(-xi log U)/xi, if xi =/= 0, + * where U is uniform on (0,1]. + * = mu + sigma (e^{xi X} - 1)/xi, + * where X has standard exponential distribution. + * + * pdf(x) = sigma^{-1} (1 + xi (x - mu)/sigma)^{-(1 + 1/xi)} + * cdf(x) = 1 - (1 + xi (x - mu)/sigma)^{-1/xi} + * = 1 - e^{-log(1 + xi (x - mu)/sigma)/xi} + * --> 1 - e^{-(x - mu)/sigma} as xi --> 0 + * sf(x) = (1 + xi (x - mu)/sigma)^{-1/xi} + * --> e^{-(x - mu)/sigma} as xi --> 0 + * icdf(p) = mu + sigma*(p^{-xi} - 1)/xi + * = mu + sigma*expm1(-xi log p)/xi + * --> mu + sigma*log p as xi --> 0 + * isf(p) = mu + sigma*((1 - p)^{xi} - 1)/xi + * = mu + sigma*expm1(-xi log1p(-p))/xi + * --> mu + sigma*log1p(-p) as xi --> 0 + */ + +/** + * Compute the CDF of the GeneralizedPareto(mu, sigma, xi) + * distribution. Well-conditioned everywhere. For standard + * distribution (mu=0, sigma=1), condition number + * + * (x/(1 + x xi)) / ((1 + x xi)^{1/xi} - 1) + * + * is bounded by 1, attained only at x = 0. + */ +STATIC double +cdf_genpareto(double x, double mu, double sigma, double xi) +{ + double x_0 = (x - mu)/sigma; + + /* + * log(1 + xi x_0)/xi + * = (-1/xi) \sum_{n=1}^\infty (-xi x_0)^n/n + * = (-1/xi) (-xi x_0 + \sum_{n=2}^\infty (-xi x_0)^n/n) + * = x_0 - (1/xi) \sum_{n=2}^\infty (-xi x_0)^n/n + * = x_0 - x_0 \sum_{n=2}^\infty (-xi x_0)^{n-1}/n + * = x_0 (1 - d), + * + * where d = \sum_{n=2}^\infty (-xi x_0)^{n-1}/n. If |xi| < + * eps/4|x_0|, then + * + * |d| <= \sum_{n=2}^\infty (eps/4)^{n-1}/n + * <= \sum_{n=2}^\infty (eps/4)^{n-1} + * = \sum_{n=1}^\infty (eps/4)^n + * = (eps/4) \sum_{n=0}^\infty (eps/4)^n + * = (eps/4)/(1 - eps/4) + * < eps/2 + * + * for any 0 < eps < 2. Thus, the relative error of x_0 from + * log(1 + xi x_0)/xi is bounded by eps. + */ + if (fabs(xi) < 1e-17/x_0) + return -expm1(-x_0); + else + return -expm1(-log1p(xi*x_0)/xi); +} + +/** + * Compute the SF of the GeneralizedPareto(mu, sigma, xi) distribution. + * For standard distribution (mu=0, sigma=1), ill-conditioned for xi + * near 0; condition number + * + * -x (1 + x xi)^{(-1 - xi)/xi}/(1 + x xi)^{-1/xi} + * = -x (1 + x xi)^{-1/xi - 1}/(1 + x xi)^{-1/xi} + * = -(x/(1 + x xi)) (1 + x xi)^{-1/xi}/(1 + x xi)^{-1/xi} + * = -x/(1 + x xi) + * + * is bounded by 1/xi. + */ +STATIC double +sf_genpareto(double x, double mu, double sigma, double xi) +{ + double x_0 = (x - mu)/sigma; + + if (fabs(xi) < 1e-17/x_0) + return exp(-x_0); + else + return exp(-log1p(xi*x_0)/xi); +} + +/** + * Compute the inverse of the CDF of the GeneralizedPareto(mu, sigma, + * xi) distribution. Ill-conditioned for p near 1; condition number is + * + * xi (p/(1 - p))/(1 - (1 - p)^xi) + */ +STATIC double +icdf_genpareto(double p, double mu, double sigma, double xi) +{ + /* + * To compute f(xi) = (U^{-xi} - 1)/xi = (e^{-xi log U} - 1)/xi + * for xi near zero (note f(xi) --> -log U as xi --> 0), write + * the absolutely convergent Taylor expansion + * + * f(xi) = (1/xi)*(-xi log U + \sum_{n=2}^\infty (-xi log U)^n/n! + * = -log U + (1/xi)*\sum_{n=2}^\infty (-xi log U)^n/n! + * = -log U + \sum_{n=2}^\infty xi^{n-1} (-log U)^n/n! + * = -log U - log U \sum_{n=2}^\infty (-xi log U)^{n-1}/n! + * = -log U (1 + \sum_{n=2}^\infty (-xi log U)^{n-1}/n!). + * + * Let d = \sum_{n=2}^\infty (-xi log U)^{n-1}/n!. What do we + * lose if we discard it and use -log U as an approximation to + * f(xi)? If |xi| < eps/-4log U, then + * + * |d| <= \sum_{n=2}^\infty |xi log U|^{n-1}/n! + * <= \sum_{n=2}^\infty (eps/4)^{n-1}/n! + * <= \sum_{n=1}^\infty (eps/4)^n + * = (eps/4) \sum_{n=0}^\infty (eps/4)^n + * = (eps/4)/(1 - eps/4) + * < eps/2, + * + * for any 0 < eps < 2. Hence, as long as |xi| < eps/-2log U, + * f(xi) = -log U (1 + d) for |d| <= eps/2. |d| is the + * relative error of f(xi) from -log U; from this bound, the + * relative error of -log U from f(xi) is at most (eps/2)/(1 - + * eps/2) = eps/2 + (eps/2)^2 + (eps/2)^3 + ... < eps for 0 < + * eps < 1. Since -log U < 1000 for all U in (0, 1] in + * binary64 floating-point, we can safely cut xi off at 1e-20 < + * eps/4000 and attain <1ulp error from series truncation. + */ + if (fabs(xi) <= 1e-20) + return mu - sigma*log1p(-p); + else + return mu + sigma*expm1(-xi*log1p(-p))/xi; +} + +/** + * Compute the inverse of the SF of the GeneralizedPareto(mu, sigma, + * xi) distribution. Ill-conditioned for p near 1; conditon number is + * + * -xi/(1 - p^{-xi}) + */ +STATIC double +isf_genpareto(double p, double mu, double sigma, double xi) +{ + if (fabs(xi) <= 1e-20) + return mu - sigma*log(p); + else + return mu + sigma*expm1(-xi*log(p))/xi; +} + +/*******************************************************************/ + +/** + * Deterministic samplers, parametrized by uniform integer and (0,1] + * samples. No guarantees are made about _which_ mapping from the + * integer and (0,1] samples these use; all that is guaranteed is the + * distribution of the outputs conditioned on a uniform distribution on + * the inputs. The automatic tests in test_prob_distr.c double-check + * the particular mappings we use. + * + * Beware: Unlike random_uniform_01(), these are not guaranteed to be + * supported on all possible outputs. See Ilya Mironov, `On the + * Significance of the Least Significant Bits for Differential + * Privacy', for an example of what can go wrong if you try to use + * these to conceal information from an adversary but you expose the + * specific full-precision floating-point values. + * + * Note: None of these samplers use rejection sampling; they are all + * essentially inverse-CDF transforms with tweaks. If you were to add, + * say, a Gamma sampler with the Marsaglia-Tsang method, you would have + * to parametrize it by a potentially infinite stream of uniform (and + * perhaps normal) samples rather than a fixed number, which doesn't + * make for quite as nice automatic testing as for these. + */ + +/** + * Deterministically sample from the interval [a, b], indexed by a + * uniform random floating-point number p0 in (0, 1]. + * + * Note that even if p0 is nonzero, the result may be equal to a, if + * ulp(a)/2 is nonnegligible, e.g. if a = 1. For maximum resolution, + * arrange |a| <= |b|. + */ +STATIC double +sample_uniform_interval(double p0, double a, double b) +{ + /* + * XXX Prove that the distribution is, in fact, uniform on + * [a,b], particularly around p0 = 1, or at least has very + * small deviation from uniform, quantified appropriately + * (e.g., like in Monahan 1984, or by KL divergence). It + * almost certainly does but it would be nice to quantify the + * error. + */ + if ((a <= 0 && 0 <= b) || (b <= 0 && 0 <= a)) { + /* + * When ab < 0, (1 - t) a + t b is monotonic, since for + * a <= b it is a sum of nondecreasing functions of t, + * and for b <= a, of nonincreasing functions of t. + * Further, clearly at 0 and 1 it attains a and b, + * respectively. Hence it is bounded within [a, b]. + */ + return (1 - p0)*a + p0*b; + } else { + /* + * a + (b - a) t is monotonic -- it is obviously a + * nondecreasing function of t for a <= b. Further, it + * attains a at 0, and while it may overshoot b at 1, + * we have a + * + * Theorem. If 0 <= t < 1, then the floating-point + * evaluation of a + (b - a) t is bounded in [a, b]. + * + * Lemma 1. If 0 <= t < 1 is a floating-point number, + * then for any normal floating-point number x except + * the smallest in magnitude, |round(x*t)| < |x|. + * + * Proof. WLOG, assume x >= 0. Since the rounding + * function and t |---> x*t are nondecreasing, their + * composition t |---> round(x*t) is also + * nondecreasing, so it suffices to consider the + * largest floating-point number below 1, in particular + * t = 1 - ulp(1)/2. + * + * Case I: If x is a power of two, then the next + * floating-point number below x is x - ulp(x)/2 = x - + * x*ulp(1)/2 = x*(1 - ulp(1)/2) = x*t, so, since x*t + * is a floating-point number, multiplication is exact, + * and thus round(x*t) = x*t < x. + * + * Case II: If x is not a power of two, then the + * greatest lower bound of real numbers rounded to x is + * x - ulp(x)/2 = x - ulp(T(x))/2 = x - T(x)*ulp(1)/2, + * where T(X) is the largest power of two below x. + * Anything below this bound is rounded to a + * floating-point number smaller than x, and x*t = x*(1 + * - ulp(1)/2) = x - x*ulp(1)/2 < x - T(x)*ulp(1)/2 + * since T(x) < x, so round(x*t) < x*t < x. QED. + * + * Lemma 2. If x and y are subnormal, then round(x + + * y) = x + y. + * + * Proof. It is a matter of adding the significands, + * since if we treat subnormals as having an implicit + * zero bit before the `binary' point, their exponents + * are all the same. There is at most one carry/borrow + * bit, which can always be acommodated either in a + * subnormal, or, at largest, in the implicit one bit + * of a normal. + * + * Lemma 3. Let x and y be floating-point numbers. If + * round(x - y) is subnormal or zero, then it is equal + * to x - y. + * + * Proof. Case I (equal): round(x - y) = 0 iff x = y; + * hence if round(x - y) = 0, then round(x - y) = 0 = x + * - y. + * + * Case II (subnormal/subnormal): If x and y are both + * subnormal, this follows directly from Lemma 2. + * + * Case IIIa (normal/subnormal): If x is normal and y + * is subnormal, then x and y must share sign, or else + * x - y would be larger than x and thus rounded to + * normal. If s is the smallest normal positive + * floating-point number, |x| < 2s since by + * construction 2s - |y| is normal for all subnormal y. + * This means that x and y must have the same exponent, + * so the difference is the difference of significands, + * which is exact. + * + * Case IIIb (subnormal/normal): Same as case IIIa for + * -(y - x). + * + * Case IV (normal/normal): If x and y are both normal, + * then they must share sign, or else x - y would be + * larger than x and thus rounded to normal. Note that + * |y| < 2|x|, for if |y| >= 2|x|, then |x| - |y| <= + * -|x| but -|x| is normal like x. Also, |x|/2 < |y|: + * if |x|/2 is subnormal, it must hold because y is + * normal; if |x|/2 is normal, then |x|/2 >= s, so + * since |x| - |y| < s, + * + * |x|/2 = |x| - |x|/2 <= |x| - s <= |y|; + * + * that is, |x|/2 < |y| < 2|x|, so by the Sterbenz + * lemma, round(x - y) = x - y. QED. + * + * Proof of theorem. WLOG, assume 0 <= a <= b. Since + * round(a + round(round(b - a)*t) is nondecreasing in + * t and attains a at 0, the lower end of the bound is + * trivial; we must show the upper end of the bound + * strictly. It suffices to show this for the largest + * floating-point number below 1, namely 1 - ulp(1)/2. + * + * Case I: round(b - a) is normal. Then it is at most + * the smallest floating-point number above b - a. By + * Lemma 1, round(round(b - a)*t) < round(b - a). + * Since the inequality is strict, and since + * round(round(b - a)*t) is a floating-point number + * below round(b - a), and since there are no + * floating-point numbers between b - a and round(b - + * a), we must have round(round(b - a)*t) < b - a. + * Then since y |---> round(a + y) is nondecreasing, we + * must have + * + * round(a + round(round(b - a)*t)) + * <= round(a + (b - a)) + * = round(b) = b. + * + * Case II: round(b - a) is subnormal. In this case, + * Lemma 1 falls apart -- we are not guaranteed the + * strict inequality. However, by Lemma 3, the + * difference is exact: round(b - a) = b - a. Thus, + * + * round(a + round(round(b - a)*t)) + * <= round(a + round((b - a)*t)) + * <= round(a + (b - a)) + * = round(b) + * = b, + * + * QED. + */ + + /* p0 is restricted to [0,1], but we use >= to silence -Wfloat-equal. */ + if (p0 >= 1) + return b; + return a + (b - a)*p0; + } +} + +/** + * Deterministically sample from the standard logistic distribution, + * indexed by a uniform random 32-bit integer s and uniform random + * floating-point numbers t and p0 in (0, 1]. + */ +STATIC double +sample_logistic(uint32_t s, double t, double p0) +{ + double sign = (s & 1) ? -1 : +1; + double r; + + /* + * We carve up the interval (0, 1) into subregions to compute + * the inverse CDF precisely: + * + * A = (0, 1/(1 + e)] ---> (-\infty, -1] + * B = [1/(1 + e), 1/2] ---> [-1, 0] + * C = [1/2, 1 - 1/(1 + e)] ---> [0, 1] + * D = [1 - 1/(1 + e), 1) ---> [1, +\infty) + * + * Cases D and C are mirror images of cases A and B, + * respectively, so we choose between them by the sign chosen + * by a fair coin toss. We choose between cases A and B by a + * coin toss weighted by + * + * 2/(1 + e) = 1 - [1/2 - 1/(1 + e)]/(1/2): + * + * if it comes up heads, scale p0 into a uniform (0, 1/(1 + e)] + * sample p; if it comes up tails, scale p0 into a uniform (0, + * 1/2 - 1/(1 + e)] sample and compute the inverse CDF of p = + * 1/2 - p0. + */ + if (t <= 2/(1 + exp(1))) { + /* p uniform in (0, 1/(1 + e)], represented by p. */ + p0 /= 1 + exp(1); + r = logit(p0); + } else { + /* + * p uniform in [1/(1 + e), 1/2), actually represented + * by p0 = 1/2 - p uniform in (0, 1/2 - 1/(1 + e)], so + * that p = 1/2 - p. + */ + p0 *= 0.5 - 1/(1 + exp(1)); + r = logithalf(p0); + } + + /* + * We have chosen from the negative half of the standard + * logistic distribution, which is symmetric with the positive + * half. Now use the sign to choose uniformly between them. + */ + return sign*r; +} + +/** + * Deterministically sample from the logistic distribution scaled by + * sigma and translated by mu. + */ +static double +sample_logistic_locscale(uint32_t s, double t, double p0, double mu, + double sigma) +{ + + return mu + sigma*sample_logistic(s, t, p0); +} + +/** + * Deterministically sample from the standard log-logistic + * distribution, indexed by a uniform random 32-bit integer s and a + * uniform random floating-point number p0 in (0, 1]. + */ +STATIC double +sample_log_logistic(uint32_t s, double p0) +{ + + /* + * Carve up the interval (0, 1) into (0, 1/2] and [1/2, 1); the + * condition numbers of the icdf and the isf coincide at 1/2. + */ + p0 *= 0.5; + if ((s & 1) == 0) { + /* p = p0 in (0, 1/2] */ + return p0/(1 - p0); + } else { + /* p = 1 - p0 in [1/2, 1) */ + return (1 - p0)/p0; + } +} + +/** + * Deterministically sample from the log-logistic distribution with + * scale alpha and shape beta. + */ +static double +sample_log_logistic_scaleshape(uint32_t s, double p0, double alpha, + double beta) +{ + double x = sample_log_logistic(s, p0); + + return alpha*pow(x, 1/beta); +} + +/** + * Deterministically sample from the standard exponential distribution, + * indexed by a uniform random 32-bit integer s and a uniform random + * floating-point number p0 in (0, 1]. + */ +static double +sample_exponential(uint32_t s, double p0) +{ + /* + * We would like to evaluate log(p) for p near 0, and log1p(-p) + * for p near 1. Simply carve the interval into (0, 1/2] and + * [1/2, 1) by a fair coin toss. + */ + p0 *= 0.5; + if ((s & 1) == 0) + /* p = p0 in (0, 1/2] */ + return -log(p0); + else + /* p = 1 - p0 in [1/2, 1) */ + return -log1p(-p0); +} + +/** + * Deterministically sample from a Weibull distribution with scale + * lambda and shape k -- just an exponential with a shape parameter in + * addition to a scale parameter. (Yes, lambda really is the scale, + * _not_ the rate.) + */ +STATIC double +sample_weibull(uint32_t s, double p0, double lambda, double k) +{ + + return lambda*pow(sample_exponential(s, p0), 1/k); +} + +/** + * Deterministically sample from the generalized Pareto distribution + * with shape xi, indexed by a uniform random 32-bit integer s and a + * uniform random floating-point number p0 in (0, 1]. + */ +STATIC double +sample_genpareto(uint32_t s, double p0, double xi) +{ + double x = sample_exponential(s, p0); + + /* + * Write f(xi) = (e^{xi x} - 1)/xi for xi near zero as the + * absolutely convergent Taylor series + * + * f(x) = (1/xi) (xi x + \sum_{n=2}^\infty (xi x)^n/n!) + * = x + (1/xi) \sum_{n=2}^\inty (xi x)^n/n! + * = x + \sum_{n=2}^\infty xi^{n-1} x^n/n! + * = x + x \sum_{n=2}^\infty (xi x)^{n-1}/n! + * = x (1 + \sum_{n=2}^\infty (xi x)^{n-1}/n!). + * + * d = \sum_{n=2}^\infty (xi x)^{n-1}/n! is the relative error + * of f(x) from x. If |xi| < eps/4x, then + * + * |d| <= \sum_{n=2}^\infty |xi x|^{n-1}/n! + * <= \sum_{n=2}^\infty (eps/4)^{n-1}/n! + * <= \sum_{n=1}^\infty (eps/4) + * = (eps/4) \sum_{n=0}^\infty (eps/4)^n + * = (eps/4)/(1 - eps/4) + * < eps/2, + * + * for any 0 < eps < 2. Hence, as long as |xi| < eps/2x, f(xi) + * = x (1 + d) for |d| <= eps/2, so x = f(xi) (1 + d') for |d'| + * <= eps. What bound should we use for x? + * + * - If x is exponentially distributed, x > 200 with + * probability below e^{-200} << 2^{-256}, i.e. never. + * + * - If x is computed by -log(U) for U in (0, 1], x is + * guaranteed to be below 1000 in IEEE 754 binary64 + * floating-point. + * + * We can safely cut xi off at 1e-20 < eps/4000 and attain an + * error bounded by 0.5 ulp for this expression. + */ + return (fabs(xi) < 1e-20 ? x : expm1(xi*x)/xi); +} + +/** + * Deterministically sample from a generalized Pareto distribution with + * shape xi, scaled by sigma and translated by mu. + */ +static double +sample_genpareto_locscale(uint32_t s, double p0, double mu, double sigma, + double xi) +{ + + return mu + sigma*sample_genpareto(s, p0, xi); +} + +/** + * Deterministically sample from the geometric distribution with + * per-trial success probability p. + * + * XXX Quantify the error (KL divergence?) of this + * ceiling-of-exponential sampler from a true geometric distribution, + * which we could get by rejection sampling. Relevant papers: + * + * John F. Monahan, `Accuracy in Random Number Generation', + * Mathematics of Computation 45(172), October 1984, pp. 559--568. +*https://pdfs.semanticscholar.org/aca6/74b96da1df77b2224e8cfc5dd6d61a471632.pdf + * + * Karl Bringmann and Tobias Friedrich, `Exact and Efficient + * Generation of Geometric Random Variates and Random Graphs', in + * Proceedings of the 40th International Colloaquium on Automata, + * Languages, and Programming -- ICALP 2013, Springer LNCS 7965, + * pp.267--278. + * https://doi.org/10.1007/978-3-642-39206-1_23 + * https://people.mpi-inf.mpg.de/~kbringma/paper/2013ICALP-1.pdf + */ +static double +sample_geometric(uint32_t s, double p0, double p) +{ + double x = sample_exponential(s, p0); + + /* This is actually a check against 1, but we do >= so that the compiler + does not raise a -Wfloat-equal */ + if (p >= 1) + return 1; + + return ceil(-x/log1p(-p)); +} + +/*******************************************************************/ + +/** Public API for probability distributions: + * + * For each probability distribution we define each public functions + * (sample/cdf/sf/icdf/isf) as part of its dist_ops structure. + */ + +const char * +dist_name(const struct dist *dist) +{ + return dist->ops->name; +} + +double +dist_sample(const struct dist *dist) +{ + return dist->ops->sample(dist); +} + +double +dist_cdf(const struct dist *dist, double x) +{ + return dist->ops->cdf(dist, x); +} + +double +dist_sf(const struct dist *dist, double x) +{ + return dist->ops->sf(dist, x); +} + +double +dist_icdf(const struct dist *dist, double p) +{ + return dist->ops->icdf(dist, p); +} + +double +dist_isf(const struct dist *dist, double p) +{ + return dist->ops->isf(dist, p); +} + +/** Functions for uniform distribution */ + +static double +uniform_sample(const struct dist *dist) +{ + const struct uniform *U = const_container_of(dist, struct uniform, + base); + double p0 = random_uniform_01(); + + return sample_uniform_interval(p0, U->a, U->b); +} + +static double +uniform_cdf(const struct dist *dist, double x) +{ + const struct uniform *U = const_container_of(dist, struct uniform, + base); + + if (x < U->a) + return 0; + else if (x < U->b) + return (x - U->a)/(U->b - U->a); + else + return 1; +} + +static double +uniform_sf(const struct dist *dist, double x) +{ + const struct uniform *U = const_container_of(dist, struct uniform, + base); + + if (x > U->b) + return 0; + else if (x > U->a) + return (U->b - x)/(U->b - U->a); + else + return 1; +} + +static double +uniform_icdf(const struct dist *dist, double p) +{ + const struct uniform *U = const_container_of(dist, struct uniform, + base); + double w = U->b - U->a; + + return (p < 0.5 ? (U->a + w*p) : (U->b - w*(1 - p))); +} + +static double +uniform_isf(const struct dist *dist, double p) +{ + const struct uniform *U = const_container_of(dist, struct uniform, + base); + double w = U->b - U->a; + + return (p < 0.5 ? (U->b - w*p) : (U->a + w*(1 - p))); +} + +const struct dist_ops uniform_ops = { + .name = "uniform", + .sample = uniform_sample, + .cdf = uniform_cdf, + .sf = uniform_sf, + .icdf = uniform_icdf, + .isf = uniform_isf, +}; + +/** Functions for logistic distribution: */ + +static double +logistic_sample(const struct dist *dist) +{ + const struct logistic *L = const_container_of(dist, struct logistic, + base); + uint32_t s = crypto_rand_u32(); + double t = random_uniform_01(); + double p0 = random_uniform_01(); + + return sample_logistic_locscale(s, t, p0, L->mu, L->sigma); +} + +static double +logistic_cdf(const struct dist *dist, double x) +{ + const struct logistic *L = const_container_of(dist, struct logistic, + base); + + return cdf_logistic(x, L->mu, L->sigma); +} + +static double +logistic_sf(const struct dist *dist, double x) +{ + const struct logistic *L = const_container_of(dist, struct logistic, + base); + + return sf_logistic(x, L->mu, L->sigma); +} + +static double +logistic_icdf(const struct dist *dist, double p) +{ + const struct logistic *L = const_container_of(dist, struct logistic, + base); + + return icdf_logistic(p, L->mu, L->sigma); +} + +static double +logistic_isf(const struct dist *dist, double p) +{ + const struct logistic *L = const_container_of(dist, struct logistic, + base); + + return isf_logistic(p, L->mu, L->sigma); +} + +const struct dist_ops logistic_ops = { + .name = "logistic", + .sample = logistic_sample, + .cdf = logistic_cdf, + .sf = logistic_sf, + .icdf = logistic_icdf, + .isf = logistic_isf, +}; + +/** Functions for log-logistic distribution: */ + +static double +log_logistic_sample(const struct dist *dist) +{ + const struct log_logistic *LL = const_container_of(dist, struct + log_logistic, base); + uint32_t s = crypto_rand_u32(); + double p0 = random_uniform_01(); + + return sample_log_logistic_scaleshape(s, p0, LL->alpha, LL->beta); +} + +static double +log_logistic_cdf(const struct dist *dist, double x) +{ + const struct log_logistic *LL = const_container_of(dist, + struct log_logistic, base); + + return cdf_log_logistic(x, LL->alpha, LL->beta); +} + +static double +log_logistic_sf(const struct dist *dist, double x) +{ + const struct log_logistic *LL = const_container_of(dist, + struct log_logistic, base); + + return sf_log_logistic(x, LL->alpha, LL->beta); +} + +static double +log_logistic_icdf(const struct dist *dist, double p) +{ + const struct log_logistic *LL = const_container_of(dist, + struct log_logistic, base); + + return icdf_log_logistic(p, LL->alpha, LL->beta); +} + +static double +log_logistic_isf(const struct dist *dist, double p) +{ + const struct log_logistic *LL = const_container_of(dist, + struct log_logistic, base); + + return isf_log_logistic(p, LL->alpha, LL->beta); +} + +const struct dist_ops log_logistic_ops = { + .name = "log logistic", + .sample = log_logistic_sample, + .cdf = log_logistic_cdf, + .sf = log_logistic_sf, + .icdf = log_logistic_icdf, + .isf = log_logistic_isf, +}; + +/** Functions for Weibull distribution */ + +static double +weibull_sample(const struct dist *dist) +{ + const struct weibull *W = const_container_of(dist, struct weibull, + base); + uint32_t s = crypto_rand_u32(); + double p0 = random_uniform_01(); + + return sample_weibull(s, p0, W->lambda, W->k); +} + +static double +weibull_cdf(const struct dist *dist, double x) +{ + const struct weibull *W = const_container_of(dist, struct weibull, + base); + + return cdf_weibull(x, W->lambda, W->k); +} + +static double +weibull_sf(const struct dist *dist, double x) +{ + const struct weibull *W = const_container_of(dist, struct weibull, + base); + + return sf_weibull(x, W->lambda, W->k); +} + +static double +weibull_icdf(const struct dist *dist, double p) +{ + const struct weibull *W = const_container_of(dist, struct weibull, + base); + + return icdf_weibull(p, W->lambda, W->k); +} + +static double +weibull_isf(const struct dist *dist, double p) +{ + const struct weibull *W = const_container_of(dist, struct weibull, + base); + + return isf_weibull(p, W->lambda, W->k); +} + +const struct dist_ops weibull_ops = { + .name = "Weibull", + .sample = weibull_sample, + .cdf = weibull_cdf, + .sf = weibull_sf, + .icdf = weibull_icdf, + .isf = weibull_isf, +}; + +/** Functions for generalized Pareto distributions */ + +static double +genpareto_sample(const struct dist *dist) +{ + const struct genpareto *GP = const_container_of(dist, struct genpareto, + base); + uint32_t s = crypto_rand_u32(); + double p0 = random_uniform_01(); + + return sample_genpareto_locscale(s, p0, GP->mu, GP->sigma, GP->xi); +} + +static double +genpareto_cdf(const struct dist *dist, double x) +{ + const struct genpareto *GP = const_container_of(dist, struct genpareto, + base); + + return cdf_genpareto(x, GP->mu, GP->sigma, GP->xi); +} + +static double +genpareto_sf(const struct dist *dist, double x) +{ + const struct genpareto *GP = const_container_of(dist, struct genpareto, + base); + + return sf_genpareto(x, GP->mu, GP->sigma, GP->xi); +} + +static double +genpareto_icdf(const struct dist *dist, double p) +{ + const struct genpareto *GP = const_container_of(dist, struct genpareto, + base); + + return icdf_genpareto(p, GP->mu, GP->sigma, GP->xi); +} + +static double +genpareto_isf(const struct dist *dist, double p) +{ + const struct genpareto *GP = const_container_of(dist, struct genpareto, + base); + + return isf_genpareto(p, GP->mu, GP->sigma, GP->xi); +} + +const struct dist_ops genpareto_ops = { + .name = "generalized Pareto", + .sample = genpareto_sample, + .cdf = genpareto_cdf, + .sf = genpareto_sf, + .icdf = genpareto_icdf, + .isf = genpareto_isf, +}; + +/** Functions for geometric distribution on number of trials before success */ + +static double +geometric_sample(const struct dist *dist) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + uint32_t s = crypto_rand_u32(); + double p0 = random_uniform_01(); + + return sample_geometric(s, p0, G->p); +} + +static double +geometric_cdf(const struct dist *dist, double x) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + if (x < 1) + return 0; + /* 1 - (1 - p)^floor(x) = 1 - e^{floor(x) log(1 - p)} */ + return -expm1(floor(x)*log1p(-G->p)); +} + +static double +geometric_sf(const struct dist *dist, double x) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + if (x < 1) + return 0; + /* (1 - p)^floor(x) = e^{ceil(x) log(1 - p)} */ + return exp(floor(x)*log1p(-G->p)); +} + +static double +geometric_icdf(const struct dist *dist, double p) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + return log1p(-p)/log1p(-G->p); +} + +static double +geometric_isf(const struct dist *dist, double p) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + return log(p)/log1p(-G->p); +} + +const struct dist_ops geometric_ops = { + .name = "geometric (1-based)", + .sample = geometric_sample, + .cdf = geometric_cdf, + .sf = geometric_sf, + .icdf = geometric_icdf, + .isf = geometric_isf, +}; diff --git a/src/lib/math/prob_distr.h b/src/lib/math/prob_distr.h new file mode 100644 index 0000000000..66acb796fd --- /dev/null +++ b/src/lib/math/prob_distr.h @@ -0,0 +1,158 @@ + +/** + * \file prob_distr.h + * + * \brief Header for prob_distr.c + **/ + +#ifndef TOR_PROB_DISTR_H +#define TOR_PROB_DISTR_H + +#include "lib/cc/compat_compiler.h" +#include "lib/cc/torint.h" +#include "lib/testsupport/testsupport.h" + +/** + * Container for distribution parameters for sampling, CDF, &c. + */ +struct dist { + const struct dist_ops *ops; +}; + +#define DIST_BASE(OPS) { .ops = (OPS) } +#define DIST_BASE_TYPED(OPS, OBJ, TYPE) \ + DIST_BASE((OPS) + 0*sizeof(&(OBJ) - (const TYPE *)&(OBJ))) + +const char *dist_name(const struct dist *); +double dist_sample(const struct dist *); +double dist_cdf(const struct dist *, double x); +double dist_sf(const struct dist *, double x); +double dist_icdf(const struct dist *, double p); +double dist_isf(const struct dist *, double p); + +struct dist_ops { + const char *name; + double (*sample)(const struct dist *); + double (*cdf)(const struct dist *, double x); + double (*sf)(const struct dist *, double x); + double (*icdf)(const struct dist *, double p); + double (*isf)(const struct dist *, double p); +}; + +/* Geometric distribution on positive number of trials before first success */ + +struct geometric { + struct dist base; + double p; /* success probability */ +}; + +extern const struct dist_ops geometric_ops; + +#define GEOMETRIC(OBJ) \ + DIST_BASE_TYPED(&geometric_ops, OBJ, struct geometric) + +/* Pareto distribution */ + +struct genpareto { + struct dist base; + double mu; + double sigma; + double xi; +}; + +extern const struct dist_ops genpareto_ops; + +#define GENPARETO(OBJ) \ + DIST_BASE_TYPED(&genpareto_ops, OBJ, struct genpareto) + +/* Weibull distribution */ + +struct weibull { + struct dist base; + double lambda; + double k; +}; + +extern const struct dist_ops weibull_ops; + +#define WEIBULL(OBJ) \ + DIST_BASE_TYPED(&weibull_ops, OBJ, struct weibull) + +/* Log-logistic distribution */ + +struct log_logistic { + struct dist base; + double alpha; + double beta; +}; + +extern const struct dist_ops log_logistic_ops; + +#define LOG_LOGISTIC(OBJ) \ + DIST_BASE_TYPED(&log_logistic_ops, OBJ, struct log_logistic) + +/* Logistic distribution */ + +struct logistic { + struct dist base; + double mu; + double sigma; +}; + +extern const struct dist_ops logistic_ops; + +#define LOGISTIC(OBJ) \ + DIST_BASE_TYPED(&logistic_ops, OBJ, struct logistic) + +/* Uniform distribution */ + +struct uniform { + struct dist base; + double a; + double b; +}; + +extern const struct dist_ops uniform_ops; + +#define UNIFORM(OBJ) \ + DIST_BASE_TYPED(&uniform_ops, OBJ, struct uniform) + +/** Only by unittests */ + +#ifdef PROB_DISTR_PRIVATE + +STATIC double logithalf(double p0); +STATIC double logit(double p); + +STATIC double random_uniform_01(void); + +STATIC double logistic(double x); +STATIC double cdf_logistic(double x, double mu, double sigma); +STATIC double sf_logistic(double x, double mu, double sigma); +STATIC double icdf_logistic(double p, double mu, double sigma); +STATIC double isf_logistic(double p, double mu, double sigma); +STATIC double sample_logistic(uint32_t s, double t, double p0); + +STATIC double cdf_log_logistic(double x, double alpha, double beta); +STATIC double sf_log_logistic(double x, double alpha, double beta); +STATIC double icdf_log_logistic(double p, double alpha, double beta); +STATIC double isf_log_logistic(double p, double alpha, double beta); +STATIC double sample_log_logistic(uint32_t s, double p0); + +STATIC double cdf_weibull(double x, double lambda, double k); +STATIC double sf_weibull(double x, double lambda, double k); +STATIC double icdf_weibull(double p, double lambda, double k); +STATIC double isf_weibull(double p, double lambda, double k); +STATIC double sample_weibull(uint32_t s, double p0, double lambda, double k); + +STATIC double sample_uniform_interval(double p0, double a, double b); + +STATIC double cdf_genpareto(double x, double mu, double sigma, double xi); +STATIC double sf_genpareto(double x, double mu, double sigma, double xi); +STATIC double icdf_genpareto(double p, double mu, double sigma, double xi); +STATIC double isf_genpareto(double p, double mu, double sigma, double xi); +STATIC double sample_genpareto(uint32_t s, double p0, double xi); + +#endif + +#endif diff --git a/src/lib/smartlist_core/smartlist_foreach.h b/src/lib/smartlist_core/smartlist_foreach.h index 54f08ac47d..c9afebd6a2 100644 --- a/src/lib/smartlist_core/smartlist_foreach.h +++ b/src/lib/smartlist_core/smartlist_foreach.h @@ -83,6 +83,19 @@ ++var ## _sl_idx) { \ var = (sl)->list[var ## _sl_idx]; +/** Iterates over the items in smartlist <b>sl</b> in reverse order, similar to + * SMARTLIST_FOREACH_BEGIN + * + * NOTE: This macro is incompatible with SMARTLIST_DEL_CURRENT. + */ +#define SMARTLIST_FOREACH_REVERSE_BEGIN(sl, type, var) \ + STMT_BEGIN \ + int var ## _sl_idx, var ## _sl_len=(sl)->num_used; \ + type var; \ + for (var ## _sl_idx = var ## _sl_len-1; var ## _sl_idx >= 0; \ + --var ## _sl_idx) { \ + var = (sl)->list[var ## _sl_idx]; + #define SMARTLIST_FOREACH_END(var) \ var = NULL; \ (void) var ## _sl_idx; \ diff --git a/src/lib/time/.may_include b/src/lib/time/.may_include index 40a18805ac..ae01431b60 100644 --- a/src/lib/time/.may_include +++ b/src/lib/time/.may_include @@ -7,6 +7,7 @@ lib/log/*.h lib/subsys/*.h lib/time/*.h lib/wallclock/*.h +lib/defs/time.h # For load_windows_system_lib. lib/fs/winlib.h
\ No newline at end of file diff --git a/src/lib/time/compat_time.c b/src/lib/time/compat_time.c index f1ddb4fdc4..387b0fad22 100644 --- a/src/lib/time/compat_time.c +++ b/src/lib/time/compat_time.c @@ -787,8 +787,8 @@ monotime_absolute_nsec(void) return monotime_diff_nsec(&initialized_at, &now); } -uint64_t -monotime_absolute_usec(void) +MOCK_IMPL(uint64_t, +monotime_absolute_usec,(void)) { return monotime_absolute_nsec() / 1000; } diff --git a/src/lib/time/compat_time.h b/src/lib/time/compat_time.h index c5337e9998..3c8797c450 100644 --- a/src/lib/time/compat_time.h +++ b/src/lib/time/compat_time.h @@ -199,7 +199,7 @@ uint64_t monotime_absolute_nsec(void); /** * Return the number of microseconds since the timer system was initialized. */ -uint64_t monotime_absolute_usec(void); +MOCK_DECL(uint64_t, monotime_absolute_usec,(void)); /** * Return the number of milliseconds since the timer system was initialized. */ diff --git a/src/lib/time/tvdiff.c b/src/lib/time/tvdiff.c index bc8a1166e7..9dfb63c26f 100644 --- a/src/lib/time/tvdiff.c +++ b/src/lib/time/tvdiff.c @@ -11,6 +11,7 @@ #include "lib/time/tvdiff.h" #include "lib/cc/compat_compiler.h" +#include "lib/defs/time.h" #include "lib/log/log.h" #ifdef _WIN32 @@ -20,8 +21,6 @@ #include <sys/time.h> #endif -#define TOR_USEC_PER_SEC 1000000 - /** Return the difference between start->tv_sec and end->tv_sec. * Returns INT64_MAX on overflow and underflow. */ diff --git a/src/rust/protover/protover.rs b/src/rust/protover/protover.rs index 8624afeafa..0b2a78c210 100644 --- a/src/rust/protover/protover.rs +++ b/src/rust/protover/protover.rs @@ -46,6 +46,7 @@ pub enum Protocol { LinkAuth, Microdesc, Relay, + Padding, } impl fmt::Display for Protocol { @@ -73,6 +74,7 @@ impl FromStr for Protocol { "LinkAuth" => Ok(Protocol::LinkAuth), "Microdesc" => Ok(Protocol::Microdesc), "Relay" => Ok(Protocol::Relay), + "Padding" => Ok(Protocol::Padding), _ => Err(ProtoverError::UnknownProtocol), } } @@ -163,7 +165,8 @@ pub(crate) fn get_supported_protocols_cstr() -> &'static CStr { Link=1-5 \ LinkAuth=3 \ Microdesc=1-2 \ - Relay=1-2" + Relay=1-2 \ + Padding=1" ) } else { cstr!( @@ -176,7 +179,8 @@ pub(crate) fn get_supported_protocols_cstr() -> &'static CStr { Link=1-5 \ LinkAuth=1,3 \ Microdesc=1-2 \ - Relay=1-2" + Relay=1-2 \ + Padding=1" ) } } diff --git a/src/test/Makefile.nmake b/src/test/Makefile.nmake index aa16a22b52..ca6a84cf8a 100644 --- a/src/test/Makefile.nmake +++ b/src/test/Makefile.nmake @@ -19,6 +19,7 @@ TEST_OBJECTS = test.obj test_addr.obj test_channel.obj test_channeltls.obj \ test_cell_formats.obj test_relay.obj test_replay.obj \ test_channelpadding.obj \ test_circuitstats.obj \ + test_circuitpadding.obj \ test_scheduler.obj test_introduce.obj test_hs.obj tinytest.obj tinytest.obj: ..\ext\tinytest.c diff --git a/src/test/include.am b/src/test/include.am index 4725e8cbaa..b276500fd5 100644 --- a/src/test/include.am +++ b/src/test/include.am @@ -101,6 +101,7 @@ src_test_test_SOURCES += \ src/test/test_cell_queue.c \ src/test/test_channel.c \ src/test/test_channelpadding.c \ + src/test/test_circuitpadding.c \ src/test/test_channeltls.c \ src/test/test_checkdir.c \ src/test/test_circuitlist.c \ @@ -156,6 +157,7 @@ src_test_test_SOURCES += \ src/test/test_periodic_event.c \ src/test/test_policy.c \ src/test/test_process.c \ + src/test/test_prob_distr.c \ src/test/test_procmon.c \ src/test/test_proto_http.c \ src/test/test_proto_misc.c \ @@ -206,6 +208,7 @@ src_test_test_slow_SOURCES += \ src/test/test_slow.c \ src/test/test_crypto_slow.c \ src/test/test_process_slow.c \ + src/test/test_prob_distr.c \ src/test/testing_common.c \ src/test/testing_rsakeys.c \ src/ext/tinytest.c diff --git a/src/test/prob_distr_mpfr_ref.c b/src/test/prob_distr_mpfr_ref.c new file mode 100644 index 0000000000..4e64d731cd --- /dev/null +++ b/src/test/prob_distr_mpfr_ref.c @@ -0,0 +1,64 @@ +/* Copyright 2012-2018, The Tor Project, Inc + * See LICENSE for licensing information */ + +/** prob_distr_mpfr_ref.c + * + * Example reference file for GNU MPFR vectors tested in test_prob_distr.c . + * Code by Riastradh. + */ + +#include <complex.h> +#include <float.h> +#include <math.h> +#include <stdio.h> + +/* Must come after <stdio.h> so we get mpfr_printf. */ +#include <mpfr.h> + +/* gcc -o mpfr prob_distr_mpfr_ref.c -lmpfr -lm */ + +/* Computes logit(p) for p = .49999 */ +int +main(void) +{ + mpfr_t p, q, r; + mpfr_init(p); + mpfr_set_prec(p, 200); + mpfr_init(q); + mpfr_set_prec(q, 200); + mpfr_init(r); + mpfr_set_prec(r, 200); + mpfr_set_d(p, .49999, MPFR_RNDN); + mpfr_set_d(q, 1, MPFR_RNDN); + /* r := q - p = 1 - p */ + mpfr_sub(r, q, p, MPFR_RNDN); + /* q := p/r = p/(1 - p) */ + mpfr_div(q, p, r, MPFR_RNDN); + /* r := log(q) = log(p/(1 - p)) */ + mpfr_log(r, q, MPFR_RNDN); + mpfr_printf("mpfr 200-bit\t%.128Rg\n", r); + + /* + * Print a double approximation to logit three different ways. All + * three agree bit for bit on the libms I tried, with the nextafter + * adjustment (which is well within the 10 eps relative error bound + * advertised). Apparently I must have used the Goldberg expression + * for what I wrote down in the test case. + */ + printf("mpfr 53-bit\t%.17g\n", nextafter(mpfr_get_d(r, MPFR_RNDN), 0), 0); + volatile double p0 = .49999; + printf("log1p\t\t%.17g\n", nextafter(-log1p((1 - 2*p0)/p0), 0)); + volatile double x = (1 - 2*p0)/p0; + volatile double xp1 = x + 1; + printf("Goldberg\t%.17g\n", -x*log(xp1)/(xp1 - 1)); + + /* + * Print a bad approximation, using the naive expression, to see a + * lot of wrong digits, far beyond the 10 eps relative error attained + * by -log1p((1 - 2*p)/p). + */ + printf("naive\t\t%.17g\n", log(p0/(1 - p0))); + + fflush(stdout); + return ferror(stdout); +} diff --git a/src/test/test.c b/src/test/test.c index 13e8c71709..902565dfbe 100644 --- a/src/test/test.c +++ b/src/test/test.c @@ -845,6 +845,7 @@ struct testgroup_t testgroups[] = { { "channeltls/", channeltls_tests }, { "checkdir/", checkdir_tests }, { "circuitbuild/", circuitbuild_tests }, + { "circuitpadding/", circuitpadding_tests }, { "circuitlist/", circuitlist_tests }, { "circuitmux/", circuitmux_tests }, { "circuitstats/", circuitstats_tests }, @@ -900,6 +901,7 @@ struct testgroup_t testgroups[] = { { "parsecommon/", parsecommon_tests }, { "periodic-event/" , periodic_event_tests }, { "policy/" , policy_tests }, + { "prob_distr/", prob_distr_tests }, { "procmon/", procmon_tests }, { "process/", process_tests }, { "proto/http/", proto_http_tests }, diff --git a/src/test/test.h b/src/test/test.h index 9f754469c8..39953e9f7e 100644 --- a/src/test/test.h +++ b/src/test/test.h @@ -187,6 +187,7 @@ extern struct testcase_t cell_format_tests[]; extern struct testcase_t cell_queue_tests[]; extern struct testcase_t channel_tests[]; extern struct testcase_t channelpadding_tests[]; +extern struct testcase_t circuitpadding_tests[]; extern struct testcase_t channeltls_tests[]; extern struct testcase_t checkdir_tests[]; extern struct testcase_t circuitbuild_tests[]; @@ -242,6 +243,8 @@ extern struct testcase_t parsecommon_tests[]; extern struct testcase_t pem_tests[]; extern struct testcase_t periodic_event_tests[]; extern struct testcase_t policy_tests[]; +extern struct testcase_t prob_distr_tests[]; +extern struct testcase_t slow_stochastic_prob_distr_tests[]; extern struct testcase_t procmon_tests[]; extern struct testcase_t process_tests[]; extern struct testcase_t proto_http_tests[]; diff --git a/src/test/test_circuitpadding.c b/src/test/test_circuitpadding.c new file mode 100644 index 0000000000..f4d003969e --- /dev/null +++ b/src/test/test_circuitpadding.c @@ -0,0 +1,2356 @@ +#define TOR_CHANNEL_INTERNAL_ +#define TOR_TIMERS_PRIVATE +#define CIRCUITPADDING_PRIVATE +#define NETWORKSTATUS_PRIVATE + +#include "core/or/or.h" +#include "test.h" +#include "lib/testsupport/testsupport.h" +#include "core/or/connection_or.h" +#include "core/or/channel.h" +#include "core/or/channeltls.h" +#include <event.h> +#include "lib/evloop/compat_libevent.h" +#include "lib/time/compat_time.h" +#include "lib/defs/time.h" +#include "core/or/relay.h" +#include "core/or/circuitlist.h" +#include "core/or/circuitbuild.h" +#include "core/or/circuitpadding.h" +#include "core/crypto/relay_crypto.h" +#include "core/or/protover.h" +#include "feature/nodelist/nodelist.h" +#include "lib/evloop/compat_libevent.h" +#include "app/config/config.h" + +#include "feature/nodelist/routerstatus_st.h" +#include "feature/nodelist/networkstatus_st.h" +#include "feature/nodelist/node_st.h" +#include "core/or/cell_st.h" +#include "core/or/crypt_path_st.h" +#include "core/or/or_circuit_st.h" +#include "core/or/origin_circuit_st.h" + +extern smartlist_t *connection_array; + +circid_t get_unique_circ_id_by_chan(channel_t *chan); +void helper_create_basic_machine(void); +static void helper_create_conditional_machines(void); + +static or_circuit_t * new_fake_orcirc(channel_t *nchan, channel_t *pchan); +channel_t *new_fake_channel(void); +void test_circuitpadding_negotiation(void *arg); +void test_circuitpadding_wronghop(void *arg); +void test_circuitpadding_conditions(void *arg); + +void test_circuitpadding_serialize(void *arg); +void test_circuitpadding_rtt(void *arg); +void test_circuitpadding_tokens(void *arg); +void test_circuitpadding_circuitsetup_machine(void *arg); + +static void +simulate_single_hop_extend(circuit_t *client, circuit_t *mid_relay, + int padding); +void free_fake_orcirc(circuit_t *circ); +void free_fake_origin_circuit(origin_circuit_t *circ); + +static int deliver_negotiated = 1; +static int64_t curr_mocked_time; + +static node_t padding_node; +static node_t non_padding_node; + +static channel_t dummy_channel; +static circpad_machine_spec_t circ_client_machine; + +static void +timers_advance_and_run(int64_t msec_update) +{ + curr_mocked_time += msec_update*TOR_NSEC_PER_MSEC; + monotime_coarse_set_mock_time_nsec(curr_mocked_time); + monotime_set_mock_time_nsec(curr_mocked_time); + timers_run_pending(); +} + +static void +nodes_init(void) +{ + padding_node.rs = tor_malloc_zero(sizeof(routerstatus_t)); + padding_node.rs->pv.supports_padding = 1; + + non_padding_node.rs = tor_malloc_zero(sizeof(routerstatus_t)); + non_padding_node.rs->pv.supports_padding = 0; +} + +static void +nodes_free(void) +{ + tor_free(padding_node.rs); + + tor_free(non_padding_node.rs); +} + +static const node_t * +node_get_by_id_mock(const char *identity_digest) +{ + if (identity_digest[0] == 1) { + return &padding_node; + } else if (identity_digest[0] == 0) { + return &non_padding_node; + } + + return NULL; +} + +static or_circuit_t * +new_fake_orcirc(channel_t *nchan, channel_t *pchan) +{ + or_circuit_t *orcirc = NULL; + circuit_t *circ = NULL; + crypt_path_t tmp_cpath; + char whatevs_key[CPATH_KEY_MATERIAL_LEN]; + + orcirc = tor_malloc_zero(sizeof(*orcirc)); + circ = &(orcirc->base_); + circ->magic = OR_CIRCUIT_MAGIC; + + //circ->n_chan = nchan; + circ->n_circ_id = get_unique_circ_id_by_chan(nchan); + circ->n_mux = NULL; /* ?? */ + cell_queue_init(&(circ->n_chan_cells)); + circ->n_hop = NULL; + circ->streams_blocked_on_n_chan = 0; + circ->streams_blocked_on_p_chan = 0; + circ->n_delete_pending = 0; + circ->p_delete_pending = 0; + circ->received_destroy = 0; + circ->state = CIRCUIT_STATE_OPEN; + circ->purpose = CIRCUIT_PURPOSE_OR; + circ->package_window = CIRCWINDOW_START_MAX; + circ->deliver_window = CIRCWINDOW_START_MAX; + circ->n_chan_create_cell = NULL; + + //orcirc->p_chan = pchan; + orcirc->p_circ_id = get_unique_circ_id_by_chan(pchan); + cell_queue_init(&(orcirc->p_chan_cells)); + + circuit_set_p_circid_chan(orcirc, orcirc->p_circ_id, pchan); + circuit_set_n_circid_chan(circ, circ->n_circ_id, nchan); + + memset(&tmp_cpath, 0, sizeof(tmp_cpath)); + if (circuit_init_cpath_crypto(&tmp_cpath, whatevs_key, + sizeof(whatevs_key), 0, 0)<0) { + log_warn(LD_BUG,"Circuit initialization failed"); + return NULL; + } + orcirc->crypto = tmp_cpath.crypto; + + return orcirc; +} + +void +free_fake_orcirc(circuit_t *circ) +{ + or_circuit_t *orcirc = TO_OR_CIRCUIT(circ); + + relay_crypto_clear(&orcirc->crypto); + + circpad_circuit_free_all_machineinfos(circ); + tor_free(circ); +} + +void +free_fake_origin_circuit(origin_circuit_t *circ) +{ + circpad_circuit_free_all_machineinfos(TO_CIRCUIT(circ)); + circuit_clear_cpath(circ); + tor_free(circ); +} + +void dummy_nop_timer(void); + +//static int dont_stop_libevent = 0; + +static circuit_t *client_side; +static circuit_t *relay_side; + +static int n_client_cells = 0; +static int n_relay_cells = 0; + +static int +circuit_package_relay_cell_mock(cell_t *cell, circuit_t *circ, + cell_direction_t cell_direction, + crypt_path_t *layer_hint, streamid_t on_stream, + const char *filename, int lineno); + +static void +circuitmux_attach_circuit_mock(circuitmux_t *cmux, circuit_t *circ, + cell_direction_t direction); + +static void +circuitmux_attach_circuit_mock(circuitmux_t *cmux, circuit_t *circ, + cell_direction_t direction) +{ + (void)cmux; + (void)circ; + (void)direction; + + return; +} + +static int +circuit_package_relay_cell_mock(cell_t *cell, circuit_t *circ, + cell_direction_t cell_direction, + crypt_path_t *layer_hint, streamid_t on_stream, + const char *filename, int lineno) +{ + (void)cell; (void)on_stream; (void)filename; (void)lineno; + + if (circ == client_side) { + if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATE) { + // Deliver to relay + circpad_handle_padding_negotiate(relay_side, cell); + } else { + + int is_target_hop = circpad_padding_is_from_expected_hop(circ, + layer_hint); + tt_int_op(cell_direction, OP_EQ, CELL_DIRECTION_OUT); + tt_int_op(is_target_hop, OP_EQ, 1); + + // No need to pretend a padding cell was sent: This event is + // now emitted internally when the circuitpadding code sends them. + //circpad_cell_event_padding_sent(client_side); + + // Receive padding cell at middle + circpad_deliver_recognized_relay_cell_events(relay_side, + cell->payload[0], NULL); + } + n_client_cells++; + } else if (circ == relay_side) { + tt_int_op(cell_direction, OP_EQ, CELL_DIRECTION_IN); + + if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATED) { + // XXX: blah need right layer_hint.. + if (deliver_negotiated) + circpad_handle_padding_negotiated(client_side, cell, + TO_ORIGIN_CIRCUIT(client_side) + ->cpath->next); + } else if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATE) { + circpad_handle_padding_negotiate(client_side, cell); + } else { + // No need to pretend a padding cell was sent: This event is + // now emitted internally when the circuitpadding code sends them. + //circpad_cell_event_padding_sent(relay_side); + + // Receive padding cell at client + circpad_deliver_recognized_relay_cell_events(client_side, + cell->payload[0], + TO_ORIGIN_CIRCUIT(client_side)->cpath->next); + } + + n_relay_cells++; + } + + done: + timers_advance_and_run(1); + return 0; +} + +// Test reading and writing padding to strings (or options_t + consensus) +void +test_circuitpadding_serialize(void *arg) +{ + (void)arg; +} + +static signed_error_t +circpad_send_command_to_hop_mock(origin_circuit_t *circ, uint8_t hopnum, + uint8_t relay_command, const uint8_t *payload, + ssize_t payload_len) +{ + (void) circ; + (void) hopnum; + (void) relay_command; + (void) payload; + (void) payload_len; + return 0; +} + +void +test_circuitpadding_rtt(void *arg) +{ + /* Test Plan: + * + * 1. Test RTT measurement server side + * a. test usage of measured RTT + * 2. Test termination of RTT measurement + * a. test non-update of RTT + * 3. Test client side circuit and non-application of RTT.. + */ + circpad_delay_t rtt_estimate; + (void)arg; + + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + MOCK(circpad_send_command_to_hop, circpad_send_command_to_hop_mock); + + dummy_channel.cmux = circuitmux_alloc(); + relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel)); + client_side = TO_CIRCUIT(origin_circuit_new()); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; + + timers_initialize(); + circpad_machines_init(); + helper_create_basic_machine(); + + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side, + 0); + + relay_side->padding_machine[0] = &circ_client_machine; + relay_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side,0); + + /* Test 1: Test measuring RTT */ + circpad_cell_event_nonpadding_received((circuit_t*)relay_side); + tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_NE, 0); + + timers_advance_and_run(20); + + circpad_cell_event_nonpadding_sent((circuit_t*)relay_side); + tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0); + + tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_GE, 19000); + tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_LE, 30000); + tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0), + OP_EQ, + relay_side->padding_info[0]->rtt_estimate_usec+ + circpad_machine_current_state( + relay_side->padding_info[0])->start_usec); + + circpad_cell_event_nonpadding_received((circuit_t*)relay_side); + circpad_cell_event_nonpadding_received((circuit_t*)relay_side); + tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_NE, 0); + timers_advance_and_run(20); + circpad_cell_event_nonpadding_sent((circuit_t*)relay_side); + circpad_cell_event_nonpadding_sent((circuit_t*)relay_side); + tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0); + + tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_GE, 20000); + tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_LE, 21000); + tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0), + OP_EQ, + relay_side->padding_info[0]->rtt_estimate_usec+ + circpad_machine_current_state( + relay_side->padding_info[0])->start_usec); + + /* Test 2: Termination of RTT measurement (from the previous test) */ + tt_int_op(relay_side->padding_info[0]->stop_rtt_update, OP_EQ, 1); + rtt_estimate = relay_side->padding_info[0]->rtt_estimate_usec; + + circpad_cell_event_nonpadding_received((circuit_t*)relay_side); + timers_advance_and_run(4); + circpad_cell_event_nonpadding_sent((circuit_t*)relay_side); + + tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_EQ, + rtt_estimate); + tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->stop_rtt_update, OP_EQ, 1); + tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0), + OP_EQ, + relay_side->padding_info[0]->rtt_estimate_usec+ + circpad_machine_current_state( + relay_side->padding_info[0])->start_usec); + + /* Test 3: Make sure client side machine properly ignores RTT */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->last_received_time_usec, OP_EQ, 0); + + timers_advance_and_run(20); + circpad_cell_event_nonpadding_sent((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->last_received_time_usec, OP_EQ, 0); + + tt_int_op(client_side->padding_info[0]->rtt_estimate_usec, OP_EQ, 0); + tt_int_op(circpad_histogram_bin_to_usec(client_side->padding_info[0], 0), + OP_NE, client_side->padding_info[0]->rtt_estimate_usec); + tt_int_op(circpad_histogram_bin_to_usec(client_side->padding_info[0], 0), + OP_EQ, + circpad_machine_current_state( + client_side->padding_info[0])->start_usec); + done: + free_fake_orcirc(relay_side); + circuitmux_detach_all_circuits(dummy_channel.cmux, NULL); + circuitmux_free(dummy_channel.cmux); + timers_shutdown(); + monotime_disable_test_mocking(); + UNMOCK(circuit_package_relay_cell); + UNMOCK(circuitmux_attach_circuit); + tor_free(circ_client_machine.states); + + return; +} + +void +helper_create_basic_machine(void) +{ + /* Start, burst */ + circpad_machine_states_init(&circ_client_machine, 2); + + circ_client_machine.states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + + circ_client_machine.states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST; + circ_client_machine.states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + + circ_client_machine.states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_CANCEL; + + // FIXME: Is this what we want? + circ_client_machine.states[CIRCPAD_STATE_BURST].token_removal = + CIRCPAD_TOKEN_REMOVAL_HIGHER; + + // FIXME: Tune this histogram + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_len = 5; + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 500; + circ_client_machine.states[CIRCPAD_STATE_BURST].range_usec = 1000000; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[0] = 1; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[1] = 0; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[2] = 2; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[3] = 2; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[4] = 2; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_total_tokens = 7; + circ_client_machine.states[CIRCPAD_STATE_BURST].use_rtt_estimate = 1; + + return; +} + +#define BIG_HISTOGRAM_LEN 10 + +/** Setup a machine with a big histogram */ +static void +helper_create_machine_with_big_histogram(circpad_removal_t removal_strategy) +{ + const int tokens_per_bin = 2; + + /* Start, burst */ + circpad_machine_states_init(&circ_client_machine, 2); + + circpad_state_t *burst_state = + &circ_client_machine.states[CIRCPAD_STATE_BURST]; + + circ_client_machine.states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + + burst_state->next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST; + burst_state->next_state[CIRCPAD_EVENT_NONPADDING_RECV] =CIRCPAD_STATE_BURST; + + burst_state->next_state[CIRCPAD_EVENT_NONPADDING_SENT] =CIRCPAD_STATE_CANCEL; + + burst_state->token_removal = CIRCPAD_TOKEN_REMOVAL_HIGHER; + + burst_state->histogram_len = BIG_HISTOGRAM_LEN; + burst_state->start_usec = 0; + burst_state->range_usec = 1000; + + int n_tokens = 0; + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + burst_state->histogram[i] = tokens_per_bin; + n_tokens += tokens_per_bin; + } + + burst_state->histogram_total_tokens = n_tokens; + burst_state->length_dist.type = CIRCPAD_DIST_UNIFORM; + burst_state->length_dist.param1 = n_tokens; + burst_state->length_dist.param2 = n_tokens; + burst_state->max_length = n_tokens; + burst_state->length_includes_nonpadding = 1; + burst_state->use_rtt_estimate = 0; + burst_state->token_removal = removal_strategy; +} + +static circpad_decision_t +circpad_machine_schedule_padding_mock(circpad_machine_state_t *mi) +{ + (void)mi; + return 0; +} + +static uint64_t +mock_monotime_absolute_usec(void) +{ + return 100; +} + +/** Test higher token removal strategy by bin */ +static void +test_circuitpadding_token_removal_higher(void *arg) +{ + circpad_machine_state_t *mi; + (void)arg; + + /* Mock it up */ + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock); + + /* Setup test environment (time etc.) */ + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + monotime_enable_test_mocking(); + + /* Create test machine */ + helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_HIGHER); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + + /* move the machine to the right state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + /* Get the machine and setup tokens */ + mi = client_side->padding_info[0]; + tt_assert(mi); + + /*************************************************************************/ + + uint64_t current_time = monotime_absolute_usec(); + + /* Test left boundaries of each histogram bin: */ + const circpad_delay_t bin_left_bounds[] = + {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE}; + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_uint_op(bin_left_bounds[i], OP_EQ, + circpad_histogram_bin_to_usec(mi, i)); + } + + /* Check that all bins have two tokens right now */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* This is the right order to remove tokens from this histogram. That is, we + * first remove tokens from the 4th bin since 57 usec is nearest to the 4th + * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for + * the same reason, then from the 5th, etc. */ + const int bin_removal_order[] = {4, 5, 6, 7, 8}; + unsigned i; + + /* Remove all tokens from all bins apart from the infinity bin */ + for (i = 0; i < sizeof(bin_removal_order)/sizeof(int) ; i++) { + int bin_to_remove = bin_removal_order[i]; + log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin", + i, bin_to_remove); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + /* Test that we cleaned out this bin. Don't do this in the case of the last + bin since the tokens will get refilled */ + if (i != BIG_HISTOGRAM_LEN - 2) { + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0); + } + } + + /* Check that all lowe bins are not touched */ + for (i=0; i < 4 ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* Test below the lowest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 1; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[0], OP_EQ, 1); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +/** Test lower token removal strategy by bin */ +static void +test_circuitpadding_token_removal_lower(void *arg) +{ + circpad_machine_state_t *mi; + (void)arg; + + /* Mock it up */ + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock); + + /* Setup test environment (time etc.) */ + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + monotime_enable_test_mocking(); + + /* Create test machine */ + helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_LOWER); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + + /* move the machine to the right state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + /* Get the machine and setup tokens */ + mi = client_side->padding_info[0]; + tt_assert(mi); + + /*************************************************************************/ + + uint64_t current_time = monotime_absolute_usec(); + + /* Test left boundaries of each histogram bin: */ + const circpad_delay_t bin_left_bounds[] = + {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE}; + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_uint_op(bin_left_bounds[i], OP_EQ, + circpad_histogram_bin_to_usec(mi, i)); + } + + /* Check that all bins have two tokens right now */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* This is the right order to remove tokens from this histogram. That is, we + * first remove tokens from the 4th bin since 57 usec is nearest to the 4th + * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for + * the same reason, then from the 5th, etc. */ + const int bin_removal_order[] = {4, 3, 2, 1, 0}; + unsigned i; + + /* Remove all tokens from all bins apart from the infinity bin */ + for (i = 0; i < sizeof(bin_removal_order)/sizeof(int) ; i++) { + int bin_to_remove = bin_removal_order[i]; + log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin", + i, bin_to_remove); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + /* Test that we cleaned out this bin. Don't do this in the case of the last + bin since the tokens will get refilled */ + if (i != BIG_HISTOGRAM_LEN - 2) { + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0); + } + } + + /* Check that all higher bins are untouched */ + for (i = 5; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* Test above the highest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 29202; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +/** Test closest token removal strategy by bin */ +static void +test_circuitpadding_closest_token_removal(void *arg) +{ + circpad_machine_state_t *mi; + (void)arg; + + /* Mock it up */ + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock); + + /* Setup test environment (time etc.) */ + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + monotime_enable_test_mocking(); + + /* Create test machine */ + helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_CLOSEST); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + + /* move the machine to the right state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + /* Get the machine and setup tokens */ + mi = client_side->padding_info[0]; + tt_assert(mi); + + /*************************************************************************/ + + uint64_t current_time = monotime_absolute_usec(); + + /* Test left boundaries of each histogram bin: */ + const circpad_delay_t bin_left_bounds[] = + {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE}; + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_uint_op(bin_left_bounds[i], OP_EQ, + circpad_histogram_bin_to_usec(mi, i)); + } + + /* Check that all bins have two tokens right now */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* This is the right order to remove tokens from this histogram. That is, we + * first remove tokens from the 4th bin since 57 usec is nearest to the 4th + * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for + * the same reason, then from the 5th, etc. */ + const int bin_removal_order[] = {4, 3, 5, 2, 6, 1, 7, 0, 8, 9}; + + /* Remove all tokens from all bins apart from the infinity bin */ + for (int i = 0; i < BIG_HISTOGRAM_LEN-1 ; i++) { + int bin_to_remove = bin_removal_order[i]; + log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin", + i, bin_to_remove); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + /* Test that we cleaned out this bin. Don't do this in the case of the last + bin since the tokens will get refilled */ + if (i != BIG_HISTOGRAM_LEN - 2) { + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0); + } + } + + /* Check that all bins have been refilled */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* Test below the lowest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 102; + mi->histogram[0] = 0; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[1], OP_EQ, 1); + + /* Test above the highest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 29202; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +/** Test closest token removal strategy with usec */ +static void +test_circuitpadding_closest_token_removal_usec(void *arg) +{ + circpad_machine_state_t *mi; + (void)arg; + + /* Mock it up */ + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock); + + /* Setup test environment (time etc.) */ + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + monotime_enable_test_mocking(); + + /* Create test machine */ + helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + + /* move the machine to the right state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + /* Get the machine and setup tokens */ + mi = client_side->padding_info[0]; + tt_assert(mi); + + /*************************************************************************/ + + uint64_t current_time = monotime_absolute_usec(); + + /* Test left boundaries of each histogram bin: */ + const circpad_delay_t bin_left_bounds[] = + {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE}; + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_uint_op(bin_left_bounds[i], OP_EQ, + circpad_histogram_bin_to_usec(mi, i)); + } + + /* XXX we want to test remove_token_exact and + circpad_machine_remove_closest_token() with usec */ + + /* Check that all bins have two tokens right now */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* This is the right order to remove tokens from this histogram. That is, we + * first remove tokens from the 4th bin since 57 usec is nearest to the 4th + * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for + * the same reason, then from the 5th, etc. */ + const int bin_removal_order[] = {4, 3, 5, 2, 1, 0, 6, 7, 8, 9}; + + /* Remove all tokens from all bins apart from the infinity bin */ + for (int i = 0; i < BIG_HISTOGRAM_LEN-1 ; i++) { + int bin_to_remove = bin_removal_order[i]; + log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin", + i, bin_to_remove); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + /* Test that we cleaned out this bin. Don't do this in the case of the last + bin since the tokens will get refilled */ + if (i != BIG_HISTOGRAM_LEN - 2) { + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0); + } + } + + /* Check that all bins have been refilled */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* Test below the lowest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 102; + mi->histogram[0] = 0; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[1], OP_EQ, 1); + + /* Test above the highest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 29202; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +/** Test closest token removal strategy with usec */ +static void +test_circuitpadding_token_removal_exact(void *arg) +{ + circpad_machine_state_t *mi; + (void)arg; + + /* Mock it up */ + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock); + + /* Setup test environment (time etc.) */ + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + monotime_enable_test_mocking(); + + /* Create test machine */ + helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_EXACT); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + + /* move the machine to the right state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + /* Get the machine and setup tokens */ + mi = client_side->padding_info[0]; + tt_assert(mi); + + /**********************************************************************/ + uint64_t current_time = monotime_absolute_usec(); + + /* Ensure that we will clear out bin #4 with this usec */ + mi->padding_scheduled_at_usec = current_time - 57; + tt_int_op(mi->histogram[4], OP_EQ, 2); + circpad_machine_remove_token(mi); + mi->padding_scheduled_at_usec = current_time - 57; + tt_int_op(mi->histogram[4], OP_EQ, 1); + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[4], OP_EQ, 0); + + /* Ensure that we will not remove any other tokens even tho we try to, since + * this is what the exact strategy dictates */ + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + if (i != 4) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + } + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +#undef BIG_HISTOGRAM_LEN + +void +test_circuitpadding_tokens(void *arg) +{ + const circpad_state_t *state; + circpad_machine_state_t *mi; + (void)arg; + + /** Test plan: + * + * 1. Test symmetry between bin_to_usec and usec_to_bin + * a. Test conversion + * b. Test edge transitions (lower, upper) + * 2. Test remove higher on an empty bin + * a. Normal bin + * b. Infinity bin + * c. Bin 0 + * d. No higher + * 3. Test remove lower + * a. Normal bin + * b. Bin 0 + * c. No lower + * 4. Test remove closest + * a. Closest lower + * b. Closest higher + * c. Closest 0 + * d. Closest Infinity + */ + client_side = TO_CIRCUIT(origin_circuit_new()); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; + + timers_initialize(); + + helper_create_basic_machine(); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side, + 0); + + mi = client_side->padding_info[0]; + + // Pretend a non-padding cell was sent + // XXX: This messes us up.. Padding gets scheduled.. + circpad_cell_event_nonpadding_sent((circuit_t*)client_side); + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + /* We have to save the infinity bin because one inf delay + * could have been chosen when we transition to burst */ + circpad_hist_token_t inf_bin = mi->histogram[4]; + + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + state = circpad_machine_current_state(client_side->padding_info[0]); + + // Test 0: convert bin->usec->bin + // Bin 0+1 have different semantics + for (int bin = 0; bin < 2; bin++) { + circpad_delay_t usec = + circpad_histogram_bin_to_usec(client_side->padding_info[0], bin); + int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec); + tt_int_op(bin, OP_EQ, bin2); + } + for (int bin = 2; bin < state->histogram_len-1; bin++) { + circpad_delay_t usec = + circpad_histogram_bin_to_usec(client_side->padding_info[0], bin); + int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec); + tt_int_op(bin, OP_EQ, bin2); + /* Verify we round down */ + bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec+3); + tt_int_op(bin, OP_EQ, bin2); + + bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec-1); + tt_int_op(bin, OP_EQ, bin2+1); + } + + // Test 1: converting usec->bin->usec->bin + // Bin 0+1 have different semantics. + for (circpad_delay_t i = 0; i <= state->start_usec+1; i++) { + int bin = circpad_histogram_usec_to_bin(client_side->padding_info[0], + i); + circpad_delay_t usec = + circpad_histogram_bin_to_usec(client_side->padding_info[0], bin); + int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec); + tt_int_op(bin, OP_EQ, bin2); + tt_int_op(i, OP_LE, usec); + } + for (circpad_delay_t i = state->start_usec+1; + i <= state->start_usec + state->range_usec; i++) { + int bin = circpad_histogram_usec_to_bin(client_side->padding_info[0], + i); + circpad_delay_t usec = + circpad_histogram_bin_to_usec(client_side->padding_info[0], bin); + int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec); + tt_int_op(bin, OP_EQ, bin2); + tt_int_op(i, OP_GE, usec); + } + + /* 2.a. Normal higher bin */ + { + tt_int_op(mi->histogram[2], OP_EQ, 2); + tt_int_op(mi->histogram[3], OP_EQ, 2); + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + tt_int_op(mi->histogram[3], OP_EQ, 2); + tt_int_op(mi->histogram[2], OP_EQ, 1); + + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + tt_int_op(mi->histogram[2], OP_EQ, 0); + + tt_int_op(mi->histogram[3], OP_EQ, 2); + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + tt_int_op(mi->histogram[3], OP_EQ, 0); + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + tt_int_op(mi->histogram[3], OP_EQ, 0); + } + + /* 2.b. Higher Infinity bin */ + { + tt_int_op(mi->histogram[4], OP_EQ, inf_bin); + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + tt_int_op(mi->histogram[4], OP_EQ, inf_bin); + + /* Test past the infinity bin */ + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 5)+1000000); + + tt_int_op(mi->histogram[4], OP_EQ, inf_bin); + } + + /* 2.c. Bin 0 */ + { + tt_int_op(mi->histogram[0], OP_EQ, 1); + circpad_machine_remove_higher_token(mi, + state->start_usec/2); + tt_int_op(mi->histogram[0], OP_EQ, 0); + } + + /* Drain the infinity bin and cause a refill */ + while (inf_bin != 0) { + tt_int_op(mi->histogram[4], OP_EQ, inf_bin); + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + inf_bin--; + } + + circpad_cell_event_nonpadding_sent((circuit_t*)client_side); + + // We should have refilled here. + tt_int_op(mi->histogram[4], OP_EQ, 2); + + /* 3.a. Bin 0 */ + { + tt_int_op(mi->histogram[0], OP_EQ, 1); + circpad_machine_remove_higher_token(mi, + state->start_usec/2); + tt_int_op(mi->histogram[0], OP_EQ, 0); + } + + /* 3.b. Test remove lower normal bin */ + { + tt_int_op(mi->histogram[3], OP_EQ, 2); + circpad_machine_remove_lower_token(mi, + circpad_histogram_bin_to_usec(mi, 3)+1); + circpad_machine_remove_lower_token(mi, + circpad_histogram_bin_to_usec(mi, 3)+1); + tt_int_op(mi->histogram[3], OP_EQ, 0); + tt_int_op(mi->histogram[2], OP_EQ, 2); + circpad_machine_remove_lower_token(mi, + circpad_histogram_bin_to_usec(mi, 3)+1); + circpad_machine_remove_lower_token(mi, + circpad_histogram_bin_to_usec(mi, 3)+1); + /* 3.c. No lower */ + circpad_machine_remove_lower_token(mi, + circpad_histogram_bin_to_usec(mi, 3)+1); + tt_int_op(mi->histogram[2], OP_EQ, 0); + } + + /* 4. Test remove closest + * a. Closest lower + * b. Closest higher + * c. Closest 0 + * d. Closest Infinity + */ + circpad_machine_setup_tokens(mi); + tt_int_op(mi->histogram[2], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + tt_int_op(mi->histogram[2], OP_EQ, 0); + tt_int_op(mi->histogram[3], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + tt_int_op(mi->histogram[3], OP_EQ, 0); + tt_int_op(mi->histogram[0], OP_EQ, 1); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + tt_int_op(mi->histogram[0], OP_EQ, 0); + tt_int_op(mi->histogram[4], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + tt_int_op(mi->histogram[4], OP_EQ, 2); + + /* 5. Test remove closest usec + * a. Closest 0 + * b. Closest lower (below midpoint) + * c. Closest higher (above midpoint) + * d. Closest Infinity + */ + circpad_machine_setup_tokens(mi); + + tt_int_op(mi->histogram[0], OP_EQ, 1); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 0)/3, 1); + tt_int_op(mi->histogram[0], OP_EQ, 0); + tt_int_op(mi->histogram[2], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 0)/3, 1); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 0)/3, 1); + tt_int_op(mi->histogram[2], OP_EQ, 0); + tt_int_op(mi->histogram[3], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 4), 1); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 4), 1); + tt_int_op(mi->histogram[3], OP_EQ, 0); + tt_int_op(mi->histogram[4], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 4), 1); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 4), 1); + tt_int_op(mi->histogram[4], OP_EQ, 2); + + // XXX: Need more coverage of the actual usec branches + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +void +test_circuitpadding_wronghop(void *arg) +{ + /** + * Test plan: + * 1. Padding sent from hop 1 and 3 to client + * 2. Send negotiated from hop 1 and 3 to client + * 3. Garbled negotiated cell + * 4. Padding negotiate sent to client + * 5. Send negotiate stop command for unknown machine + * 6. Send negotiated to relay + * 7. Garbled padding negotiate cell + */ + (void)arg; + uint32_t read_bw = 0, overhead_bw = 0; + cell_t cell; + signed_error_t ret; + origin_circuit_t *orig_client; + + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + + client_side = (circuit_t *)origin_circuit_new(); + dummy_channel.cmux = circuitmux_alloc(); + relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel, + &dummy_channel); + orig_client = TO_ORIGIN_CIRCUIT(client_side); + + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + nodes_init(); + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; + + timers_initialize(); + circpad_machines_init(); + + MOCK(node_get_by_id, + node_get_by_id_mock); + + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + + /* Build three hops */ + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + /* verify padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL); + tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL); + + /* verify echo was sent */ + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(n_client_cells, OP_EQ, 1); + + read_bw = orig_client->n_delivered_read_circ_bw; + overhead_bw = orig_client->n_overhead_read_circ_bw; + + /* 1. Test padding from first and third hop */ + circpad_deliver_recognized_relay_cell_events(client_side, + RELAY_COMMAND_DROP, + TO_ORIGIN_CIRCUIT(client_side)->cpath); + tt_int_op(read_bw, OP_EQ, + orig_client->n_delivered_read_circ_bw); + tt_int_op(overhead_bw, OP_EQ, + orig_client->n_overhead_read_circ_bw); + + circpad_deliver_recognized_relay_cell_events(client_side, + RELAY_COMMAND_DROP, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next->next); + tt_int_op(read_bw, OP_EQ, + orig_client->n_delivered_read_circ_bw); + tt_int_op(overhead_bw, OP_EQ, + orig_client->n_overhead_read_circ_bw); + + circpad_deliver_recognized_relay_cell_events(client_side, + RELAY_COMMAND_DROP, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next); + tt_int_op(read_bw, OP_EQ, + orig_client->n_delivered_read_circ_bw); + tt_int_op(overhead_bw, OP_LT, + orig_client->n_overhead_read_circ_bw); + + /* 2. Test padding negotiated not handled from hops 1,3 */ + ret = circpad_handle_padding_negotiated(client_side, &cell, + TO_ORIGIN_CIRCUIT(client_side)->cpath); + tt_int_op(ret, OP_EQ, -1); + + ret = circpad_handle_padding_negotiated(client_side, &cell, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next->next); + tt_int_op(ret, OP_EQ, -1); + + /* 3. Garbled negotiated cell */ + memset(&cell, 255, sizeof(cell)); + ret = circpad_handle_padding_negotiated(client_side, &cell, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next); + tt_int_op(ret, OP_EQ, -1); + + /* 4. Test that negotiate is dropped at origin */ + read_bw = orig_client->n_delivered_read_circ_bw; + overhead_bw = orig_client->n_overhead_read_circ_bw; + relay_send_command_from_edge(0, relay_side, + RELAY_COMMAND_PADDING_NEGOTIATE, + (void*)cell.payload, + (size_t)3, NULL); + tt_int_op(read_bw, OP_EQ, + orig_client->n_delivered_read_circ_bw); + tt_int_op(overhead_bw, OP_EQ, + orig_client->n_overhead_read_circ_bw); + + tt_int_op(n_relay_cells, OP_EQ, 2); + tt_int_op(n_client_cells, OP_EQ, 1); + + /* 5. Test that asking to stop the wrong machine does nothing */ + circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(client_side), + 255, 2, CIRCPAD_COMMAND_STOP); + tt_ptr_op(client_side->padding_machine[0], OP_NE, NULL); + tt_ptr_op(client_side->padding_info[0], OP_NE, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL); + tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL); + tt_int_op(n_relay_cells, OP_EQ, 3); + tt_int_op(n_client_cells, OP_EQ, 2); + + /* 6. Sending negotiated command to relay does nothing */ + ret = circpad_handle_padding_negotiated(relay_side, &cell, NULL); + tt_int_op(ret, OP_EQ, -1); + + /* 7. Test garbled negotated cell (bad command 255) */ + memset(&cell, 0, sizeof(cell)); + ret = circpad_handle_padding_negotiate(relay_side, &cell); + tt_int_op(ret, OP_EQ, -1); + tt_int_op(n_client_cells, OP_EQ, 2); + + /* Test 2: Test no padding */ + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + free_fake_orcirc(relay_side); + + client_side = (circuit_t *)origin_circuit_new(); + relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel, + &dummy_channel); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 0); + + /* verify no padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + tt_int_op(n_relay_cells, OP_EQ, 3); + tt_int_op(n_client_cells, OP_EQ, 2); + + /* verify no echo was sent */ + tt_int_op(n_relay_cells, OP_EQ, 3); + tt_int_op(n_client_cells, OP_EQ, 2); + + /* Finish circuit */ + simulate_single_hop_extend(client_side, relay_side, 1); + + /* Spoof padding negotiated on circuit with no padding */ + circpad_padding_negotiated(relay_side, + CIRCPAD_MACHINE_CIRC_SETUP, + CIRCPAD_COMMAND_START, + CIRCPAD_RESPONSE_OK); + + /* verify no padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + circpad_padding_negotiated(relay_side, + CIRCPAD_MACHINE_CIRC_SETUP, + CIRCPAD_COMMAND_START, + CIRCPAD_RESPONSE_ERR); + + /* verify no padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + free_fake_orcirc(relay_side); + circuitmux_detach_all_circuits(dummy_channel.cmux, NULL); + circuitmux_free(dummy_channel.cmux); + monotime_disable_test_mocking(); + UNMOCK(node_get_by_id); + UNMOCK(circuit_package_relay_cell); + UNMOCK(circuitmux_attach_circuit); + nodes_free(); +} + +void +test_circuitpadding_negotiation(void *arg) +{ + /** + * Test plan: + * 1. Test circuit where padding is supported by middle + * a. Make sure padding negotiation is sent + * b. Test padding negotiation delivery and parsing + * 2. Test circuit where padding is unsupported by middle + * a. Make sure padding negotiation is not sent + * 3. Test failure to negotiate a machine due to desync. + */ + (void)arg; + + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + + client_side = TO_CIRCUIT(origin_circuit_new()); + dummy_channel.cmux = circuitmux_alloc(); + relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel)); + + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + nodes_init(); + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; + + timers_initialize(); + circpad_machines_init(); + + MOCK(node_get_by_id, + node_get_by_id_mock); + + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + + /* Build two hops */ + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + /* verify padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL); + tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL); + + /* verify echo was sent */ + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(n_client_cells, OP_EQ, 1); + + /* Finish circuit */ + simulate_single_hop_extend(client_side, relay_side, 1); + + /* Test 2: Test no padding */ + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + free_fake_orcirc(relay_side); + + client_side = TO_CIRCUIT(origin_circuit_new()); + relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel)); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 0); + + /* verify no padding was negotiated */ + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(n_client_cells, OP_EQ, 1); + + /* verify no echo was sent */ + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(n_client_cells, OP_EQ, 1); + + /* Finish circuit */ + simulate_single_hop_extend(client_side, relay_side, 1); + + /* Force negotiate padding. */ + circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(client_side), + CIRCPAD_MACHINE_CIRC_SETUP, + 2, CIRCPAD_COMMAND_START); + + /* verify no padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + /* verify no echo was sent */ + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(n_client_cells, OP_EQ, 1); + + /* 3. Test failure to negotiate a machine due to desync */ + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + free_fake_orcirc(relay_side); + + client_side = TO_CIRCUIT(origin_circuit_new()); + relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel)); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + SMARTLIST_FOREACH(relay_padding_machines, + circpad_machine_spec_t *, + m, tor_free(m->states); tor_free(m)); + smartlist_free(relay_padding_machines); + relay_padding_machines = smartlist_new(); + + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + /* verify echo was sent */ + tt_int_op(n_client_cells, OP_EQ, 2); + tt_int_op(n_relay_cells, OP_EQ, 2); + + /* verify no padding was negotiated */ + tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + free_fake_orcirc(relay_side); + circuitmux_detach_all_circuits(dummy_channel.cmux, NULL); + circuitmux_free(dummy_channel.cmux); + monotime_disable_test_mocking(); + UNMOCK(node_get_by_id); + UNMOCK(circuit_package_relay_cell); + UNMOCK(circuitmux_attach_circuit); + nodes_free(); +} + +static void +simulate_single_hop_extend(circuit_t *client, circuit_t *mid_relay, + int padding) +{ + char whatevs_key[CPATH_KEY_MATERIAL_LEN]; + char digest[DIGEST_LEN]; + tor_addr_t addr; + + // Pretend a non-padding cell was sent + circpad_cell_event_nonpadding_sent((circuit_t*)client); + + // Receive extend cell at middle + circpad_cell_event_nonpadding_received((circuit_t*)mid_relay); + + // Advance time a tiny bit so we can calculate an RTT + curr_mocked_time += 10 * TOR_NSEC_PER_MSEC; + monotime_coarse_set_mock_time_nsec(curr_mocked_time); + monotime_set_mock_time_nsec(curr_mocked_time); + + // Receive extended cell at middle + circpad_cell_event_nonpadding_sent((circuit_t*)mid_relay); + + // Receive extended cell at first hop + circpad_cell_event_nonpadding_received((circuit_t*)client); + + // Add a hop to cpath + crypt_path_t *hop = tor_malloc_zero(sizeof(crypt_path_t)); + onion_append_to_cpath(&TO_ORIGIN_CIRCUIT(client)->cpath, hop); + + hop->magic = CRYPT_PATH_MAGIC; + hop->state = CPATH_STATE_OPEN; + + // add an extend info to indicate if this node supports padding or not. + // (set the first byte of the digest for our mocked node_get_by_id) + digest[0] = padding; + + hop->extend_info = extend_info_new( + padding ? "padding" : "non-padding", + digest, NULL, NULL, NULL, + &addr, padding); + + circuit_init_cpath_crypto(hop, whatevs_key, sizeof(whatevs_key), 0, 0); + + hop->package_window = circuit_initial_package_window(); + hop->deliver_window = CIRCWINDOW_START; + + // Signal that the hop was added + circpad_machine_event_circ_added_hop(TO_ORIGIN_CIRCUIT(client)); +} + +static circpad_machine_spec_t * +helper_create_conditional_machine(void) +{ + circpad_machine_spec_t *ret = + tor_malloc_zero(sizeof(circpad_machine_spec_t)); + + /* Start, burst */ + circpad_machine_states_init(ret, 2); + + ret->states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_BURST; + + ret->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_BURST; + + ret->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END; + + ret->states[CIRCPAD_STATE_BURST].token_removal = + CIRCPAD_TOKEN_REMOVAL_NONE; + + ret->states[CIRCPAD_STATE_BURST].histogram_len = 3; + ret->states[CIRCPAD_STATE_BURST].start_usec = 0; + ret->states[CIRCPAD_STATE_BURST].range_usec = 1000000; + ret->states[CIRCPAD_STATE_BURST].histogram[0] = 6; + ret->states[CIRCPAD_STATE_BURST].histogram[1] = 0; + ret->states[CIRCPAD_STATE_BURST].histogram[1] = 0; + ret->states[CIRCPAD_STATE_BURST].histogram_total_tokens = 6; + ret->states[CIRCPAD_STATE_BURST].use_rtt_estimate = 0; + ret->states[CIRCPAD_STATE_BURST].length_includes_nonpadding = 1; + + return ret; +} + +static void +helper_create_conditional_machines(void) +{ + circpad_machine_spec_t *add = helper_create_conditional_machine(); + origin_padding_machines = smartlist_new(); + relay_padding_machines = smartlist_new(); + + add->machine_num = 2; + add->is_origin_side = 1; + add->should_negotiate_end = 1; + add->target_hopnum = 2; + + /* Let's have this one end after 4 packets */ + add->states[CIRCPAD_STATE_BURST].length_dist.type = CIRCPAD_DIST_UNIFORM; + add->states[CIRCPAD_STATE_BURST].length_dist.param1 = 4; + add->states[CIRCPAD_STATE_BURST].length_dist.param2 = 4; + add->states[CIRCPAD_STATE_BURST].max_length = 4; + + add->conditions.requires_vanguards = 0; + add->conditions.min_hops = 2; + add->conditions.state_mask = CIRCPAD_CIRC_BUILDING| + CIRCPAD_CIRC_NO_STREAMS|CIRCPAD_CIRC_HAS_RELAY_EARLY; + add->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL; + + smartlist_add(origin_padding_machines, add); + + add = helper_create_conditional_machine(); + add->machine_num = 3; + add->is_origin_side = 1; + add->should_negotiate_end = 1; + add->target_hopnum = 2; + + /* Let's have this one end after 4 packets */ + add->states[CIRCPAD_STATE_BURST].length_dist.type = CIRCPAD_DIST_UNIFORM; + add->states[CIRCPAD_STATE_BURST].length_dist.param1 = 4; + add->states[CIRCPAD_STATE_BURST].length_dist.param2 = 4; + add->states[CIRCPAD_STATE_BURST].max_length = 4; + + add->conditions.requires_vanguards = 1; + add->conditions.min_hops = 3; + add->conditions.state_mask = CIRCPAD_CIRC_OPENED| + CIRCPAD_CIRC_STREAMS|CIRCPAD_CIRC_HAS_NO_RELAY_EARLY; + add->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL; + smartlist_add(origin_padding_machines, add); + + add = helper_create_conditional_machine(); + add->machine_num = 2; + smartlist_add(relay_padding_machines, add); + + add = helper_create_conditional_machine(); + add->machine_num = 3; + smartlist_add(relay_padding_machines, add); +} + +void +test_circuitpadding_conditions(void *arg) +{ + /** + * Test plan: + * 0. Make a few origin and client machines with diff conditions + * * vanguards, purposes, has_opened circs, no relay early + * * Client side should_negotiate_end + * * Length limits + * 1. Test STATE_END transitions + * 2. Test new machine after end with same conditions + * 3. Test new machine due to changed conditions + * * Esp: built event, no relay early, no streams + * XXX: Diff test: + * 1. Test STATE_END with pending timers + * 2. Test marking a circuit before padding callback fires + * 3. Test freeing a circuit before padding callback fires + */ + (void)arg; + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + + nodes_init(); + dummy_channel.cmux = circuitmux_alloc(); + relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel, + &dummy_channel); + client_side = (circuit_t *)origin_circuit_new(); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; + + timers_initialize(); + helper_create_conditional_machines(); + + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + MOCK(node_get_by_id, + node_get_by_id_mock); + + /* Simulate extend. This should result in the original machine getting + * added, since the circuit is not built */ + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + /* Verify that machine #2 is added */ + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2); + + /* Deliver a padding cell to the client, to trigger burst state */ + circpad_cell_event_padding_sent(client_side); + + /* This should have trigger length shutdown condition on client.. */ + tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + /* Verify machine is gone from both sides */ + tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + + /* Send another event.. verify machine gets re-added properly + * (test race with shutdown) */ + simulate_single_hop_extend(client_side, relay_side, 1); + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2); + + TO_ORIGIN_CIRCUIT(client_side)->p_streams = 0; + circpad_machine_event_circ_has_no_streams(TO_ORIGIN_CIRCUIT(client_side)); + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2); + + /* Now make the circuit opened and send built event */ + TO_ORIGIN_CIRCUIT(client_side)->has_opened = 1; + circpad_machine_event_circ_built(TO_ORIGIN_CIRCUIT(client_side)); + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2); + + TO_ORIGIN_CIRCUIT(client_side)->remaining_relay_early_cells = 0; + circpad_machine_event_circ_has_no_relay_early( + TO_ORIGIN_CIRCUIT(client_side)); + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2); + + get_options_mutable()->HSLayer2Nodes = (void*)1; + TO_ORIGIN_CIRCUIT(client_side)->p_streams = (void*)1; + circpad_machine_event_circ_has_streams(TO_ORIGIN_CIRCUIT(client_side)); + + /* Verify different machine is added */ + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 3); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 3); + + /* Hold off on negotiated */ + deliver_negotiated = 0; + + /* Deliver a padding cell to the client, to trigger burst state */ + circpad_cell_event_padding_sent(client_side); + + /* This should have trigger length shutdown condition on client + * but not the response for the padding machine */ + tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_NE, NULL); + + /* Verify machine is gone from the relay (but negotiated not back yet */ + tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + + /* Add another hop and verify it's back */ + simulate_single_hop_extend(client_side, relay_side, 1); + + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 3); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 3); + + tt_ptr_op(client_side->padding_info[0], OP_NE, NULL); + tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL); + + done: + /* XXX: Free everything */ + return; +} + +void +test_circuitpadding_circuitsetup_machine(void *arg) +{ + /** + * Test case plan: + * + * 1. Simulate a normal circuit setup pattern + * a. Application traffic + * + * FIXME: This should focus more on exercising the machine + * features rather than actual traffic patterns. For example, + * test cancellation and bins empty/refill + */ + (void)arg; + + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + + dummy_channel.cmux = circuitmux_alloc(); + client_side = TO_CIRCUIT(origin_circuit_new()); + relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel)); + + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + nodes_init(); + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; + + timers_initialize(); + circpad_machines_init(); + + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + MOCK(node_get_by_id, + node_get_by_id_mock); + + /* Test case #1: Build a 3 hop circuit, then wait and let pad */ + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + tt_int_op(n_client_cells, OP_EQ, 1); + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->is_padding_timer_scheduled, + OP_EQ, 0); + timers_advance_and_run(2000); + tt_int_op(n_client_cells, OP_EQ, 2); + tt_int_op(n_relay_cells, OP_EQ, 1); + + tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_GAP); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + timers_advance_and_run(5000); + tt_int_op(n_client_cells, OP_EQ, 2); + tt_int_op(n_relay_cells, OP_EQ, 2); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + timers_advance_and_run(2000); + tt_int_op(n_client_cells, OP_EQ, 3); + tt_int_op(n_relay_cells, OP_EQ, 2); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + timers_advance_and_run(5000); + tt_int_op(n_client_cells, OP_EQ, 3); + tt_int_op(n_relay_cells, OP_EQ, 3); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + timers_advance_and_run(2000); + tt_int_op(n_client_cells, OP_EQ, 4); + tt_int_op(n_relay_cells, OP_EQ, 3); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + timers_advance_and_run(5000); + tt_int_op(n_client_cells, OP_EQ, 4); + tt_int_op(n_relay_cells, OP_EQ, 4); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + timers_advance_and_run(2000); + tt_int_op(n_client_cells, OP_EQ, 5); + tt_int_op(n_relay_cells, OP_EQ, 4); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + timers_advance_and_run(5000); + tt_int_op(n_client_cells, OP_EQ, 5); + tt_int_op(n_relay_cells, OP_EQ, 5); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + timers_advance_and_run(2000); + tt_int_op(n_client_cells, OP_EQ, 6); + tt_int_op(n_relay_cells, OP_EQ, 5); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + timers_advance_and_run(5000); + tt_int_op(n_client_cells, OP_EQ, 6); + tt_int_op(n_relay_cells, OP_EQ, 6); + + tt_int_op(client_side->padding_info[0]->current_state, + OP_EQ, CIRCPAD_STATE_END); + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->current_state, + OP_EQ, CIRCPAD_STATE_GAP); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + + /* Verify we can't schedule padding in END state */ + circpad_decision_t ret = + circpad_machine_schedule_padding(client_side->padding_info[0]); + tt_int_op(ret, OP_EQ, CIRCPAD_STATE_UNCHANGED); + + /* Simulate application traffic */ + circpad_cell_event_nonpadding_sent(client_side); + circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_OUT); + circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_IN); + circpad_deliver_recognized_relay_cell_events(client_side, RELAY_COMMAND_DATA, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next); + + tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_int_op(n_client_cells, OP_EQ, 6); + tt_int_op(n_relay_cells, OP_EQ, 7); + + // Test timer cancellation + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + timers_advance_and_run(5000); + circpad_cell_event_padding_received(client_side); + + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_GAP); + + tt_int_op(n_client_cells, OP_EQ, 8); + tt_int_op(n_relay_cells, OP_EQ, 8); + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + + /* Test timer cancel due to state rules */ + circpad_cell_event_nonpadding_sent(client_side); + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + circpad_cell_event_padding_received(client_side); + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + + /* Simulate application traffic to cancel timer */ + circpad_cell_event_nonpadding_sent(client_side); + circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_OUT); + circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_IN); + circpad_deliver_recognized_relay_cell_events(client_side, RELAY_COMMAND_DATA, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next); + + tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + + /* No cells sent, except negotiate end from relay */ + tt_int_op(n_client_cells, OP_EQ, 8); + tt_int_op(n_relay_cells, OP_EQ, 9); + + /* Test mark for close and free */ + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + timers_advance_and_run(5000); + circpad_cell_event_padding_received(client_side); + + tt_int_op(n_client_cells, OP_EQ, 10); + tt_int_op(n_relay_cells, OP_EQ, 10); + + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_GAP); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + circuit_mark_for_close(client_side, END_CIRC_REASON_FLAG_REMOTE); + free_fake_orcirc(relay_side); + timers_advance_and_run(5000); + + /* No cells sent */ + tt_int_op(n_client_cells, OP_EQ, 10); + tt_int_op(n_relay_cells, OP_EQ, 10); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + + circuitmux_detach_all_circuits(dummy_channel.cmux, NULL); + circuitmux_free(dummy_channel.cmux); + timers_shutdown(); + monotime_disable_test_mocking(); + UNMOCK(circuit_package_relay_cell); + UNMOCK(circuitmux_attach_circuit); + + return; +} + +/** Helper function: Initializes a padding machine where every state uses the + * uniform probability distribution. */ +static void +helper_circpad_circ_distribution_machine_setup(int min, int max) +{ + circpad_machine_states_init(&circ_client_machine, 7); + + circpad_state_t *zero_st = &circ_client_machine.states[0]; + zero_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 1; + zero_st->iat_dist.type = CIRCPAD_DIST_UNIFORM; + zero_st->iat_dist.param1 = min; + zero_st->iat_dist.param2 = max; + zero_st->start_usec = min; + zero_st->range_usec = max; + + circpad_state_t *first_st = &circ_client_machine.states[1]; + first_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 2; + first_st->iat_dist.type = CIRCPAD_DIST_LOGISTIC; + first_st->iat_dist.param1 = min; + first_st->iat_dist.param2 = max; + first_st->start_usec = min; + first_st->range_usec = max; + + circpad_state_t *second_st = &circ_client_machine.states[2]; + second_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 3; + second_st->iat_dist.type = CIRCPAD_DIST_LOG_LOGISTIC; + second_st->iat_dist.param1 = min; + second_st->iat_dist.param2 = max; + second_st->start_usec = min; + second_st->range_usec = max; + + circpad_state_t *third_st = &circ_client_machine.states[3]; + third_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 4; + third_st->iat_dist.type = CIRCPAD_DIST_GEOMETRIC; + third_st->iat_dist.param1 = min; + third_st->iat_dist.param2 = max; + third_st->start_usec = min; + third_st->range_usec = max; + + circpad_state_t *fourth_st = &circ_client_machine.states[4]; + fourth_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 5; + fourth_st->iat_dist.type = CIRCPAD_DIST_WEIBULL; + fourth_st->iat_dist.param1 = min; + fourth_st->iat_dist.param2 = max; + fourth_st->start_usec = min; + fourth_st->range_usec = max; + + circpad_state_t *fifth_st = &circ_client_machine.states[5]; + fifth_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 6; + fifth_st->iat_dist.type = CIRCPAD_DIST_PARETO; + fifth_st->iat_dist.param1 = min; + fifth_st->iat_dist.param2 = max; + fifth_st->start_usec = min; + fifth_st->range_usec = max; +} + +/** Simple test that the padding delays sampled from a uniform distribution + * actually faill within the uniform distribution range. */ +/* TODO: Upgrade this test so that each state tests a different prob + * distribution */ +static void +test_circuitpadding_sample_distribution(void *arg) +{ + circpad_machine_state_t *mi; + int n_samples; + int n_states; + + (void) arg; + + /* mock this function so that we dont actually schedule any padding */ + MOCK(circpad_machine_schedule_padding, + circpad_machine_schedule_padding_mock); + + /* Initialize a machine with multiple probability distributions that should + * return values between 0 and 5 */ + circpad_machines_init(); + helper_circpad_circ_distribution_machine_setup(0, 10); + + /* Initialize machine and circuits */ + client_side = TO_CIRCUIT(origin_circuit_new()); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + mi = client_side->padding_info[0]; + + /* For every state, sample a bunch of values from the distribution and ensure + * they fall within range. */ + for (n_states = 0 ; n_states < 6; n_states++) { + /* Make sure we in the right state */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, n_states); + + for (n_samples = 0; n_samples < 100; n_samples++) { + circpad_delay_t delay = circpad_machine_sample_delay(mi); + tt_int_op(delay, OP_GE, 0); + tt_int_op(delay, OP_LE, 10); + } + + /* send a non-padding cell to move to the next machine state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + } + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + UNMOCK(circpad_machine_schedule_padding); +} + +static circpad_decision_t +circpad_machine_spec_transition_mock(circpad_machine_state_t *mi, + circpad_event_t event) +{ + (void) mi; + (void) event; + + return CIRCPAD_STATE_UNCHANGED; +} + +/* Test per-machine padding rate limits */ +static void +test_circuitpadding_machine_rate_limiting(void *arg) +{ + (void) arg; + bool retval; + circpad_machine_state_t *mi; + int i; + + /* Ignore machine transitions for the purposes of this function, we only + * really care about padding counts */ + MOCK(circpad_machine_spec_transition, circpad_machine_spec_transition_mock); + MOCK(circpad_send_command_to_hop, circpad_send_command_to_hop_mock); + + /* Setup machine and circuits */ + client_side = TO_CIRCUIT(origin_circuit_new()); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + helper_create_basic_machine(); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + mi = client_side->padding_info[0]; + /* Set up the machine info so that we can get through the basic functions */ + mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE; + + /* First we are going to test the per-machine rate limits */ + circ_client_machine.max_padding_percent = 50; + circ_client_machine.allowed_padding_count = 100; + + /* Check padding limit, should be fine since we haven't sent anything yet. */ + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 0); + + /* Send 99 padding cells which is below circpad_global_allowed_cells=100, so + * the rate limit will not trigger */ + for (i=0;i<99;i++) { + circpad_send_padding_cell_for_callback(mi); + } + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 0); + + /* Now send another padding cell to pass circpad_global_allowed_cells=100, + and see that the limit will trigger */ + circpad_send_padding_cell_for_callback(mi); + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 1); + + retval = circpad_machine_schedule_padding(mi); + tt_int_op(retval, OP_EQ, CIRCPAD_STATE_UNCHANGED); + + /* Cover wrap */ + for (;i<UINT16_MAX;i++) { + circpad_send_padding_cell_for_callback(mi); + } + tt_int_op(mi->padding_sent, OP_EQ, UINT16_MAX/2+1); + + tt_ptr_op(client_side->padding_info[0], OP_EQ, mi); + for (i=0;i<UINT16_MAX;i++) { + circpad_cell_event_nonpadding_sent(client_side); + } + + tt_int_op(mi->nonpadding_sent, OP_EQ, UINT16_MAX/2); + tt_int_op(mi->padding_sent, OP_EQ, UINT16_MAX/4+1); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); +} + +/* Test global padding rate limits */ +static void +test_circuitpadding_global_rate_limiting(void *arg) +{ + (void) arg; + bool retval; + circpad_machine_state_t *mi; + int i; + + /* Ignore machine transitions for the purposes of this function, we only + * really care about padding counts */ + MOCK(circpad_machine_spec_transition, circpad_machine_spec_transition_mock); + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; + timers_initialize(); + + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + dummy_channel.cmux = circuitmux_alloc(); + + /* Setup machine and circuits */ + relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel, &dummy_channel); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + helper_create_basic_machine(); + relay_side->padding_machine[0] = &circ_client_machine; + relay_side->padding_info[0] = + circpad_circuit_machineinfo_new(relay_side, 0); + mi = relay_side->padding_info[0]; + /* Set up the machine info so that we can get through the basic functions */ + mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE; + + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + /* Now test the global limits by setting up the consensus */ + networkstatus_t vote1; + vote1.net_params = smartlist_new(); + smartlist_split_string(vote1.net_params, + "circpad_global_allowed_cells=100 circpad_global_max_padding_pct=50", + NULL, 0, 0); + /* Register global limits with the padding subsystem */ + circpad_new_consensus_params(&vote1); + + /* Check padding limit, should be fine since we haven't sent anything yet. */ + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 0); + + /* Send 99 padding cells which is below circpad_global_allowed_cells=100, so + * the rate limit will not trigger */ + for (i=0;i<99;i++) { + circpad_send_padding_cell_for_callback(mi); + } + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 0); + + /* Now send another padding cell to pass circpad_global_allowed_cells=100, + and see that the limit will trigger */ + circpad_send_padding_cell_for_callback(mi); + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 1); + + retval = circpad_machine_schedule_padding(mi); + tt_int_op(retval, OP_EQ, CIRCPAD_STATE_UNCHANGED); + + /* Now send 92 non-padding cells to get near the + * circpad_global_max_padding_pct=50 limit; in particular with 96 non-padding + * cells, the padding traffic is still 51% of total traffic so limit should + * trigger */ + for (i=0;i<92;i++) { + circpad_cell_event_nonpadding_sent(relay_side); + } + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 1); + + /* Send another non-padding cell to bring the padding traffic to 50% of total + * traffic and get past the limit */ + circpad_cell_event_nonpadding_sent(relay_side); + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 0); + + done: + free_fake_orcirc(relay_side); + circuitmux_detach_all_circuits(dummy_channel.cmux, NULL); + circuitmux_free(dummy_channel.cmux); + SMARTLIST_FOREACH(vote1.net_params, char *, cp, tor_free(cp)); + smartlist_free(vote1.net_params); +} + +#define TEST_CIRCUITPADDING(name, flags) \ + { #name, test_##name, (flags), NULL, NULL } + +struct testcase_t circuitpadding_tests[] = { + //TEST_CIRCUITPADDING(circuitpadding_circuitsetup_machine, 0), + TEST_CIRCUITPADDING(circuitpadding_tokens, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_negotiation, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_wronghop, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_circuitsetup_machine, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_conditions, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_rtt, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_sample_distribution, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_machine_rate_limiting, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_global_rate_limiting, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_token_removal_lower, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_token_removal_higher, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_closest_token_removal, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_closest_token_removal_usec, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_token_removal_exact, TT_FORK), + END_OF_TESTCASES +}; + diff --git a/src/test/test_containers.c b/src/test/test_containers.c index 717eb0892a..ad0edf4aa3 100644 --- a/src/test/test_containers.c +++ b/src/test/test_containers.c @@ -96,6 +96,30 @@ test_container_smartlist_basic(void *arg) tor_free(v555); } +/** Test SMARTLIST_FOREACH_REVERSE_BEGIN loop macro */ +static void +test_container_smartlist_foreach_reverse(void *arg) +{ + smartlist_t *sl = smartlist_new(); + int i; + + (void) arg; + + /* Add integers to smartlist in increasing order */ + for (i=0;i<100;i++) { + smartlist_add(sl, (void*)(uintptr_t)i); + } + + /* Pop them out in reverse and test their value */ + SMARTLIST_FOREACH_REVERSE_BEGIN(sl, void*, k) { + i--; + tt_ptr_op(k, OP_EQ, (void*)(uintptr_t)i); + } SMARTLIST_FOREACH_END(k); + + done: + smartlist_free(sl); +} + /** Run unit tests for smartlist-of-strings functionality. */ static void test_container_smartlist_strings(void *arg) @@ -1281,6 +1305,7 @@ test_container_smartlist_strings_eq(void *arg) struct testcase_t container_tests[] = { CONTAINER_LEGACY(smartlist_basic), CONTAINER_LEGACY(smartlist_strings), + CONTAINER_LEGACY(smartlist_foreach_reverse), CONTAINER_LEGACY(smartlist_overlap), CONTAINER_LEGACY(smartlist_digests), CONTAINER_LEGACY(smartlist_join), diff --git a/src/test/test_prob_distr.c b/src/test/test_prob_distr.c new file mode 100644 index 0000000000..ff23f01033 --- /dev/null +++ b/src/test/test_prob_distr.c @@ -0,0 +1,1428 @@ +/* Copyright (c) 2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file test_prob_distr.c + * \brief Test probability distributions. + * \detail + * + * For each probability distribution we do two kinds of tests: + * + * a) We do numerical deterministic testing of their cdf/icdf/sf/isf functions + * and the various relationships between them for each distribution. We also + * do deterministic tests on their sampling functions. Test vectors for + * these tests were computed from alternative implementations and were + * eyeballed to make sure they make sense + * (e.g. src/test/prob_distr_mpfr_ref.c computes logit(p) using GNU mpfr + * with 200-bit precision and is then tested in test_logit_logistic()). + * + * b) We do stochastic hypothesis testing (G-test) to ensure that sampling from + * the given distributions is distributed properly. The stochastic tests are + * slow and their false positive rate is not well suited for CI, so they are + * currently disabled-by-default and put into 'tests-slow'. + */ + +#define PROB_DISTR_PRIVATE + +#include "orconfig.h" + +#include "test/test.h" + +#include "core/or/or.h" + +#include "lib/math/prob_distr.h" +#include "lib/math/fp.h" +#include "lib/crypt_ops/crypto_rand.h" + +#include <float.h> +#include <math.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +/** + * Return floor(d) converted to size_t, as a workaround for complaints + * under -Wbad-function-cast for (size_t)floor(d). + */ +static size_t +floor_to_size_t(double d) +{ + double integral_d = floor(d); + return (size_t)integral_d; +} + +/** + * Return ceil(d) converted to size_t, as a workaround for complaints + * under -Wbad-function-cast for (size_t)ceil(d). + */ +static size_t +ceil_to_size_t(double d) +{ + double integral_d = ceil(d); + return (size_t)integral_d; +} + +/* + * Geometric(p) distribution, supported on {1, 2, 3, ...}. + * + * Compute the probability mass function Geom(n; p) of the number of + * trials before the first success when success has probability p. + */ +static double +logpmf_geometric(unsigned n, double p) +{ + /* This is actually a check against 1, but we do >= so that the compiler + does not raise a -Wfloat-equal */ + if (p >= 1) { + if (n == 1) + return 0; + else + return -HUGE_VAL; + } + return (n - 1)*log1p(-p) + log(p); +} + +/** + * Compute the logistic function, translated in output by 1/2: + * logistichalf(x) = logistic(x) - 1/2. Well-conditioned on the entire + * real plane, with maximum condition number 1 at 0. + * + * This implementation gives relative error bounded by 5 eps. + */ +static double +logistichalf(double x) +{ + /* + * Rewrite this with the identity + * + * 1/(1 + e^{-x}) - 1/2 + * = (1 - 1/2 - e^{-x}/2)/(1 + e^{-x}) + * = (1/2 - e^{-x}/2)/(1 + e^{-x}) + * = (1 - e^{-x})/[2 (1 + e^{-x})] + * = -(e^{-x} - 1)/[2 (1 + e^{-x})], + * + * which we can evaluate by -expm1(-x)/[2 (1 + exp(-x))]. + * + * Suppose exp has error d0, + has error d1, expm1 has error + * d2, and / has error d3, so we evaluate + * + * -(1 + d2) (1 + d3) (e^{-x} - 1) + * / [2 (1 + d1) (1 + (1 + d0) e^{-x})]. + * + * In the denominator, + * + * 1 + (1 + d0) e^{-x} + * = 1 + e^{-x} + d0 e^{-x} + * = (1 + e^{-x}) (1 + d0 e^{-x}/(1 + e^{-x})), + * + * so the relative error of the numerator is + * + * d' = d2 + d3 + d2 d3, + * and of the denominator, + * d'' = d1 + d0 e^{-x}/(1 + e^{-x}) + d0 d1 e^{-x}/(1 + e^{-x}) + * = d1 + d0 L(-x) + d0 d1 L(-x), + * + * where L(-x) is logistic(-x). By Lemma 1 the relative error + * of the quotient is bounded by + * + * 2|d2 + d3 + d2 d3 - d1 - d0 L(x) + d0 d1 L(x)|, + * + * Since 0 < L(x) < 1, this is bounded by + * + * 2|d2| + 2|d3| + 2|d2 d3| + 2|d1| + 2|d0| + 2|d0 d1| + * <= 4 eps + 2 eps^2. + */ + if (x < log(DBL_EPSILON/8)) { + /* + * Avoid overflow in e^{-x}. When x < log(eps/4), we + * we further have x < logit(eps/4), so that + * logistic(x) < eps/4. Hence the relative error of + * logistic(x) - 1/2 from -1/2 is bounded by eps/2, and + * so the relative error of -1/2 from logistic(x) - 1/2 + * is bounded by eps. + */ + return -0.5; + } else { + return -expm1(-x)/(2*(1 + exp(-x))); + } +} + +/** + * Compute the log of the sum of the exps. Caller should arrange the + * array in descending order to minimize error because I don't want to + * deal with using temporary space and the one caller in this file + * arranges that anyway. + * + * Warning: This implementation does not handle infinite or NaN inputs + * sensibly, because I don't need that here at the moment. (NaN, or + * -inf and +inf together, should yield NaN; +inf and finite should + * yield +inf; otherwise all -inf should be ignored because exp(-inf) = + * 0.) + */ +static double +logsumexp(double *A, size_t n) +{ + double maximum, sum; + size_t i; + + if (n == 0) + return log(0); + + maximum = A[0]; + for (i = 1; i < n; i++) { + if (A[i] > maximum) + maximum = A[i]; + } + + sum = 0; + for (i = n; i --> 0;) + sum += exp(A[i] - maximum); + + return log(sum) + maximum; +} + +/** + * Compute log(1 - e^x). Defined only for negative x so that e^x < 1. + * This is the complement of a probability in log space. + */ +static double +log1mexp(double x) +{ + + /* + * We want to compute log on [0, 1/2) but log1p on [1/2, +inf), + * so partition x at -log(2) = log(1/2). + */ + if (-log(2) < x) + return log(-expm1(x)); + else + return log1p(-exp(x)); +} + +/* + * Tests of numerical errors in computing logit, logistic, and the + * various cdfs, sfs, icdfs, and isfs. + */ + +#define arraycount(A) (sizeof(A)/sizeof(A[0])) + +/** Return relative error between <b>actual</b> and <b>expected</b>. + * Special cases: If <b>expected</b> is zero or infinite, return 1 if + * <b>actual</b> is equal to <b>expected</b> and 0 if not, since the + * usual notion of relative error is undefined but we only use this + * for testing relerr(e, a) <= bound. If either is NaN, return NaN, + * which has the property that NaN <= bound is false no matter what + * bound is. + * + * Beware: if you test !(relerr(e, a) > bound), then then the result + * is true when a is NaN because NaN > bound is false too. See + * CHECK_RELERR for correct use to decide when to report failure. + */ +static double +relerr(double expected, double actual) +{ + /* + * To silence -Wfloat-equal, we have to test for equality using + * inequalities: we have (fabs(expected) <= 0) iff (expected == 0), + * and (actual <= expected && actual >= expected) iff actual == + * expected whether expected is zero or infinite. + */ + if (fabs(expected) <= 0 || tor_isinf(expected)) { + if (actual <= expected && actual >= expected) + return 0; + else + return 1; + } else { + return fabs((expected - actual)/expected); + } +} + +/** Check that relative error of <b>expected</b> and <b>actual</b> is within + * <b>relerr_bound</b>. Caller must arrange to have i and relerr_bound in + * scope. */ +#define CHECK_RELERR(expected, actual) do { \ + double check_expected = (expected); \ + double check_actual = (actual); \ + const char *str_expected = #expected; \ + const char *str_actual = #actual; \ + double check_relerr = relerr(expected, actual); \ + if (!(relerr(check_expected, check_actual) <= relerr_bound)) { \ + log_warn(LD_GENERAL, "%s:%d: case %u: relerr(%s=%.17e, %s=%.17e)" \ + " = %.17e > %.17e\n", \ + __func__, __LINE__, (unsigned) i, \ + str_expected, check_expected, \ + str_actual, check_actual, \ + check_relerr, relerr_bound); \ + ok = false; \ + } \ +} while (0) + +/* Check that a <= b. + * Caller must arrange to have i in scope. */ +#define CHECK_LE(a, b) do { \ + double check_a = (a); \ + double check_b = (b); \ + const char *str_a = #a; \ + const char *str_b = #b; \ + if (!(check_a <= check_b)) { \ + log_warn(LD_GENERAL, "%s:%d: case %u: %s=%.17e > %s=%.17e\n", \ + __func__, __LINE__, (unsigned) i, \ + str_a, check_a, str_b, check_b); \ + ok = false; \ + } \ +} while (0) + +/** + * Test the logit and logistic functions. Confirm that they agree with + * the cdf, sf, icdf, and isf of the standard Logistic distribution. + * Confirm that the sampler for the standard logistic distribution maps + * [0, 1] into the right subinterval for the inverse transform, for + * this implementation. + */ +static void +test_logit_logistic(void *arg) +{ + (void) arg; + + static const struct { + double x; /* x = logit(p) */ + double p; /* p = logistic(x) */ + double phalf; /* p - 1/2 = logistic(x) - 1/2 */ + } cases[] = { + { -HUGE_VAL, 0, -0.5 }, + { -1000, 0, -0.5 }, + { -710, 4.47628622567513e-309, -0.5 }, + { -708, 3.307553003638408e-308, -0.5 }, + { -2, .11920292202211755, -.3807970779778824 }, + { -1.0000001, .2689414017088022, -.23105859829119776 }, + { -1, .2689414213699951, -.23105857863000487 }, + { -0.9999999, .26894144103118883, -.2310585589688111 }, + /* see src/test/prob_distr_mpfr_ref.c for computation */ + { -4.000000000537333e-5, .49999, -1.0000000000010001e-5 }, + { -4.000000000533334e-5, .49999, -.00001 }, + { -4.000000108916878e-9, .499999999, -1.0000000272292198e-9 }, + { -4e-9, .499999999, -1e-9 }, + { -4e-16, .5, -1e-16 }, + { -4e-300, .5, -1e-300 }, + { 0, .5, 0 }, + { 4e-300, .5, 1e-300 }, + { 4e-16, .5, 1e-16 }, + { 3.999999886872274e-9, .500000001, 9.999999717180685e-10 }, + { 4e-9, .500000001, 1e-9 }, + { 4.0000000005333336e-5, .50001, .00001 }, + { 8.000042667076272e-3, .502, .002 }, + { 0.9999999, .7310585589688111, .2310585589688111 }, + { 1, .7310585786300049, .23105857863000487 }, + { 1.0000001, .7310585982911977, .23105859829119774 }, + { 2, .8807970779778823, .3807970779778824 }, + { 708, 1, .5 }, + { 710, 1, .5 }, + { 1000, 1, .5 }, + { HUGE_VAL, 1, .5 }, + }; + double relerr_bound = 3e-15; /* >10eps */ + size_t i; + bool ok = true; + + for (i = 0; i < arraycount(cases); i++) { + double x = cases[i].x; + double p = cases[i].p; + double phalf = cases[i].phalf; + + /* + * cdf is logistic, icdf is logit, and symmetry for + * sf/isf. + */ + CHECK_RELERR(logistic(x), cdf_logistic(x, 0, 1)); + CHECK_RELERR(logistic(-x), sf_logistic(x, 0, 1)); + CHECK_RELERR(logit(p), icdf_logistic(p, 0, 1)); + CHECK_RELERR(-logit(p), isf_logistic(p, 0, 1)); + + CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2, 0, 2)); + CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2, 0, 2)); + CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0, 2)/2); + CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, 2)/2); + + CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x/2, 0, .5)); + CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x/2, 0, .5)); + CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0,.5)*2); + CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, .5)*2); + + CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2 + 1, 1, 2)); + CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2 + 1, 1, 2)); + + /* + * For p near 0 and p near 1/2, the arithmetic of + * translating by 1 loses precision. + */ + if (fabs(p) > DBL_EPSILON && fabs(p) < 0.4) { + CHECK_RELERR(icdf_logistic(p, 0, 1), + (icdf_logistic(p, 1, 2) - 1)/2); + CHECK_RELERR(isf_logistic(p, 0, 1), + (isf_logistic(p, 1, 2) - 1)/2); + } + + CHECK_RELERR(p, logistic(x)); + CHECK_RELERR(phalf, logistichalf(x)); + + /* + * On the interior floating-point numbers, either logit or + * logithalf had better give the correct answer. + * + * For probabilities near 0, we can get much finer resolution with + * logit, and for probabilities near 1/2, we can get much finer + * resolution with logithalf by representing them using p - 1/2. + * + * E.g., we can write -.00001 for phalf, and .49999 for p, but the + * difference 1/2 - .00001 gives 1.0000000000010001e-5 in binary64 + * arithmetic. So test logit(.49999) which should give the same + * answer as logithalf(-1.0000000000010001e-5), namely + * -4.000000000537333e-5, and also test logithalf(-.00001) which + * gives -4.000000000533334e-5 instead -- but don't expect + * logit(.49999) to give -4.000000000533334e-5 even though it looks + * like 1/2 - .00001. + * + * A naive implementation of logit will just use log(p/(1 - p)) and + * give the answer -4.000000000551673e-05 for .49999, which is + * wrong in a lot of digits, which happens because log is + * ill-conditioned near 1 and thus amplifies whatever relative + * error we made in computing p/(1 - p). + */ + if ((0 < p && p < 1) || tor_isinf(x)) { + if (phalf >= p - 0.5 && phalf <= p - 0.5) + CHECK_RELERR(x, logit(p)); + if (p >= 0.5 + phalf && p <= 0.5 + phalf) + CHECK_RELERR(x, logithalf(phalf)); + } + + CHECK_RELERR(-phalf, logistichalf(-x)); + if (fabs(phalf) < 0.5 || tor_isinf(x)) + CHECK_RELERR(-x, logithalf(-phalf)); + if (p < 1 || tor_isinf(x)) { + CHECK_RELERR(1 - p, logistic(-x)); + if (p > .75 || tor_isinf(x)) + CHECK_RELERR(-x, logit(1 - p)); + } else { + CHECK_LE(logistic(-x), 1e-300); + } + } + + for (i = 0; i <= 100; i++) { + double p0 = (double)i/100; + + CHECK_RELERR(logit(p0/(1 + M_E)), sample_logistic(0, 0, p0)); + CHECK_RELERR(-logit(p0/(1 + M_E)), sample_logistic(1, 0, p0)); + CHECK_RELERR(logithalf(p0*(0.5 - 1/(1 + M_E))), + sample_logistic(0, 1, p0)); + CHECK_RELERR(-logithalf(p0*(0.5 - 1/(1 + M_E))), + sample_logistic(1, 1, p0)); + } + + if (!ok) + printf("fail logit/logistic / logistic cdf/sf\n"); + + tt_assert(ok); + + done: + ; +} + +/** + * Test the cdf, sf, icdf, and isf of the LogLogistic distribution. + */ +static void +test_log_logistic(void *arg) +{ + (void) arg; + + static const struct { + /* x is a point in the support of the LogLogistic distribution */ + double x; + /* 'p' is the probability that a random variable X for a given LogLogistic + * probability ditribution will take value less-or-equal to x */ + double p; + /* 'np' is the probability that a random variable X for a given LogLogistic + * probability distribution will take value greater-or-equal to x. */ + double np; + } cases[] = { + { 0, 0, 1 }, + { 1e-300, 1e-300, 1 }, + { 1e-17, 1e-17, 1 }, + { 1e-15, 1e-15, .999999999999999 }, + { .1, .09090909090909091, .90909090909090909 }, + { .25, .2, .8 }, + { .5, .33333333333333333, .66666666666666667 }, + { .75, .42857142857142855, .5714285714285714 }, + { .9999, .49997499874993756, .5000250012500626 }, + { .99999999, .49999999749999996, .5000000025 }, + { .999999999999999, .49999999999999994, .5000000000000002 }, + { 1, .5, .5 }, + }; + double relerr_bound = 3e-15; + size_t i; + bool ok = true; + + for (i = 0; i < arraycount(cases); i++) { + double x = cases[i].x; + double p = cases[i].p; + double np = cases[i].np; + + CHECK_RELERR(p, cdf_log_logistic(x, 1, 1)); + CHECK_RELERR(p, cdf_log_logistic(x/2, .5, 1)); + CHECK_RELERR(p, cdf_log_logistic(x*2, 2, 1)); + CHECK_RELERR(p, cdf_log_logistic(sqrt(x), 1, 2)); + CHECK_RELERR(p, cdf_log_logistic(sqrt(x)/2, .5, 2)); + CHECK_RELERR(p, cdf_log_logistic(sqrt(x)*2, 2, 2)); + if (2*sqrt(DBL_MIN) < x) { + CHECK_RELERR(p, cdf_log_logistic(x*x, 1, .5)); + CHECK_RELERR(p, cdf_log_logistic(x*x/2, .5, .5)); + CHECK_RELERR(p, cdf_log_logistic(x*x*2, 2, .5)); + } + + CHECK_RELERR(np, sf_log_logistic(x, 1, 1)); + CHECK_RELERR(np, sf_log_logistic(x/2, .5, 1)); + CHECK_RELERR(np, sf_log_logistic(x*2, 2, 1)); + CHECK_RELERR(np, sf_log_logistic(sqrt(x), 1, 2)); + CHECK_RELERR(np, sf_log_logistic(sqrt(x)/2, .5, 2)); + CHECK_RELERR(np, sf_log_logistic(sqrt(x)*2, 2, 2)); + if (2*sqrt(DBL_MIN) < x) { + CHECK_RELERR(np, sf_log_logistic(x*x, 1, .5)); + CHECK_RELERR(np, sf_log_logistic(x*x/2, .5, .5)); + CHECK_RELERR(np, sf_log_logistic(x*x*2, 2, .5)); + } + + CHECK_RELERR(np, cdf_log_logistic(1/x, 1, 1)); + CHECK_RELERR(np, cdf_log_logistic(1/(2*x), .5, 1)); + CHECK_RELERR(np, cdf_log_logistic(2/x, 2, 1)); + CHECK_RELERR(np, cdf_log_logistic(1/sqrt(x), 1, 2)); + CHECK_RELERR(np, cdf_log_logistic(1/(2*sqrt(x)), .5, 2)); + CHECK_RELERR(np, cdf_log_logistic(2/sqrt(x), 2, 2)); + if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) { + CHECK_RELERR(np, cdf_log_logistic(1/(x*x), 1, .5)); + CHECK_RELERR(np, cdf_log_logistic(1/(2*x*x), .5, .5)); + CHECK_RELERR(np, cdf_log_logistic(2/(x*x), 2, .5)); + } + + CHECK_RELERR(p, sf_log_logistic(1/x, 1, 1)); + CHECK_RELERR(p, sf_log_logistic(1/(2*x), .5, 1)); + CHECK_RELERR(p, sf_log_logistic(2/x, 2, 1)); + CHECK_RELERR(p, sf_log_logistic(1/sqrt(x), 1, 2)); + CHECK_RELERR(p, sf_log_logistic(1/(2*sqrt(x)), .5, 2)); + CHECK_RELERR(p, sf_log_logistic(2/sqrt(x), 2, 2)); + if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) { + CHECK_RELERR(p, sf_log_logistic(1/(x*x), 1, .5)); + CHECK_RELERR(p, sf_log_logistic(1/(2*x*x), .5, .5)); + CHECK_RELERR(p, sf_log_logistic(2/(x*x), 2, .5)); + } + + CHECK_RELERR(x, icdf_log_logistic(p, 1, 1)); + CHECK_RELERR(x/2, icdf_log_logistic(p, .5, 1)); + CHECK_RELERR(x*2, icdf_log_logistic(p, 2, 1)); + CHECK_RELERR(x, icdf_log_logistic(p, 1, 1)); + CHECK_RELERR(sqrt(x)/2, icdf_log_logistic(p, .5, 2)); + CHECK_RELERR(sqrt(x)*2, icdf_log_logistic(p, 2, 2)); + CHECK_RELERR(sqrt(x), icdf_log_logistic(p, 1, 2)); + CHECK_RELERR(x*x/2, icdf_log_logistic(p, .5, .5)); + CHECK_RELERR(x*x*2, icdf_log_logistic(p, 2, .5)); + + if (np < .9) { + CHECK_RELERR(x, isf_log_logistic(np, 1, 1)); + CHECK_RELERR(x/2, isf_log_logistic(np, .5, 1)); + CHECK_RELERR(x*2, isf_log_logistic(np, 2, 1)); + CHECK_RELERR(sqrt(x), isf_log_logistic(np, 1, 2)); + CHECK_RELERR(sqrt(x)/2, isf_log_logistic(np, .5, 2)); + CHECK_RELERR(sqrt(x)*2, isf_log_logistic(np, 2, 2)); + CHECK_RELERR(x*x, isf_log_logistic(np, 1, .5)); + CHECK_RELERR(x*x/2, isf_log_logistic(np, .5, .5)); + CHECK_RELERR(x*x*2, isf_log_logistic(np, 2, .5)); + + CHECK_RELERR(1/x, icdf_log_logistic(np, 1, 1)); + CHECK_RELERR(1/(2*x), icdf_log_logistic(np, .5, 1)); + CHECK_RELERR(2/x, icdf_log_logistic(np, 2, 1)); + CHECK_RELERR(1/sqrt(x), icdf_log_logistic(np, 1, 2)); + CHECK_RELERR(1/(2*sqrt(x)), + icdf_log_logistic(np, .5, 2)); + CHECK_RELERR(2/sqrt(x), icdf_log_logistic(np, 2, 2)); + CHECK_RELERR(1/(x*x), icdf_log_logistic(np, 1, .5)); + CHECK_RELERR(1/(2*x*x), icdf_log_logistic(np, .5, .5)); + CHECK_RELERR(2/(x*x), icdf_log_logistic(np, 2, .5)); + } + + CHECK_RELERR(1/x, isf_log_logistic(p, 1, 1)); + CHECK_RELERR(1/(2*x), isf_log_logistic(p, .5, 1)); + CHECK_RELERR(2/x, isf_log_logistic(p, 2, 1)); + CHECK_RELERR(1/sqrt(x), isf_log_logistic(p, 1, 2)); + CHECK_RELERR(1/(2*sqrt(x)), isf_log_logistic(p, .5, 2)); + CHECK_RELERR(2/sqrt(x), isf_log_logistic(p, 2, 2)); + CHECK_RELERR(1/(x*x), isf_log_logistic(p, 1, .5)); + CHECK_RELERR(1/(2*x*x), isf_log_logistic(p, .5, .5)); + CHECK_RELERR(2/(x*x), isf_log_logistic(p, 2, .5)); + } + + for (i = 0; i <= 100; i++) { + double p0 = (double)i/100; + + CHECK_RELERR(0.5*p0/(1 - 0.5*p0), sample_log_logistic(0, p0)); + CHECK_RELERR((1 - 0.5*p0)/(0.5*p0), + sample_log_logistic(1, p0)); + } + + if (!ok) + printf("fail log logistic cdf/sf\n"); + + tt_assert(ok); + + done: + ; +} + +/** + * Test the cdf, sf, icdf, isf of the Weibull distribution. + */ +static void +test_weibull(void *arg) +{ + (void) arg; + + static const struct { + /* x is a point in the support of the Weibull distribution */ + double x; + /* 'p' is the probability that a random variable X for a given Weibull + * probability ditribution will take value less-or-equal to x */ + double p; + /* 'np' is the probability that a random variable X for a given Weibull + * probability distribution will take value greater-or-equal to x. */ + double np; + } cases[] = { + { 0, 0, 1 }, + { 1e-300, 1e-300, 1 }, + { 1e-17, 1e-17, 1 }, + { .1, .09516258196404043, .9048374180359595 }, + { .5, .3934693402873666, .6065306597126334 }, + { .6931471805599453, .5, .5 }, + { 1, .6321205588285577, .36787944117144233 }, + { 10, .9999546000702375, 4.5399929762484854e-5 }, + { 36, .9999999999999998, 2.319522830243569e-16 }, + { 37, .9999999999999999, 8.533047625744066e-17 }, + { 38, 1, 3.1391327920480296e-17 }, + { 100, 1, 3.720075976020836e-44 }, + { 708, 1, 3.307553003638408e-308 }, + { 710, 1, 4.47628622567513e-309 }, + { 1000, 1, 0 }, + { HUGE_VAL, 1, 0 }, + }; + double relerr_bound = 3e-15; + size_t i; + bool ok = true; + + for (i = 0; i < arraycount(cases); i++) { + double x = cases[i].x; + double p = cases[i].p; + double np = cases[i].np; + + CHECK_RELERR(p, cdf_weibull(x, 1, 1)); + CHECK_RELERR(p, cdf_weibull(x/2, .5, 1)); + CHECK_RELERR(p, cdf_weibull(x*2, 2, 1)); + /* For 0 < x < sqrt(DBL_MIN), x^2 loses lots of bits. */ + if (x <= 0 || + sqrt(DBL_MIN) <= x) { + CHECK_RELERR(p, cdf_weibull(x*x, 1, .5)); + CHECK_RELERR(p, cdf_weibull(x*x/2, .5, .5)); + CHECK_RELERR(p, cdf_weibull(x*x*2, 2, .5)); + } + CHECK_RELERR(p, cdf_weibull(sqrt(x), 1, 2)); + CHECK_RELERR(p, cdf_weibull(sqrt(x)/2, .5, 2)); + CHECK_RELERR(p, cdf_weibull(sqrt(x)*2, 2, 2)); + CHECK_RELERR(np, sf_weibull(x, 1, 1)); + CHECK_RELERR(np, sf_weibull(x/2, .5, 1)); + CHECK_RELERR(np, sf_weibull(x*2, 2, 1)); + CHECK_RELERR(np, sf_weibull(x*x, 1, .5)); + CHECK_RELERR(np, sf_weibull(x*x/2, .5, .5)); + CHECK_RELERR(np, sf_weibull(x*x*2, 2, .5)); + if (x >= 10) { + /* + * exp amplifies the error of sqrt(x)^2 + * proportionally to exp(x); for large inputs + * this is significant. + */ + double t = -expm1(-x*(2*DBL_EPSILON + DBL_EPSILON)); + relerr_bound = t + DBL_EPSILON + t*DBL_EPSILON; + if (relerr_bound < 3e-15) + /* + * The tests are written only to 16 + * decimal places anyway even if your + * `double' is, say, i387 binary80, for + * whatever reason. + */ + relerr_bound = 3e-15; + CHECK_RELERR(np, sf_weibull(sqrt(x), 1, 2)); + CHECK_RELERR(np, sf_weibull(sqrt(x)/2, .5, 2)); + CHECK_RELERR(np, sf_weibull(sqrt(x)*2, 2, 2)); + } + + if (p <= 0.75) { + /* + * For p near 1, not enough precision near 1 to + * recover x. + */ + CHECK_RELERR(x, icdf_weibull(p, 1, 1)); + CHECK_RELERR(x/2, icdf_weibull(p, .5, 1)); + CHECK_RELERR(x*2, icdf_weibull(p, 2, 1)); + } + if (p >= 0.25 && !tor_isinf(x) && np > 0) { + /* + * For p near 0, not enough precision in np + * near 1 to recover x. For 0, isf gives inf, + * even if p is precise enough for the icdf to + * work. + */ + CHECK_RELERR(x, isf_weibull(np, 1, 1)); + CHECK_RELERR(x/2, isf_weibull(np, .5, 1)); + CHECK_RELERR(x*2, isf_weibull(np, 2, 1)); + } + } + + for (i = 0; i <= 100; i++) { + double p0 = (double)i/100; + + CHECK_RELERR(3*sqrt(-log(p0/2)), sample_weibull(0, p0, 3, 2)); + CHECK_RELERR(3*sqrt(-log1p(-p0/2)), + sample_weibull(1, p0, 3, 2)); + } + + if (!ok) + printf("fail Weibull cdf/sf\n"); + + tt_assert(ok); + + done: + ; +} + +/** + * Test the cdf, sf, icdf, and isf of the generalized Pareto + * distribution. + */ +static void +test_genpareto(void *arg) +{ + (void) arg; + + struct { + /* xi is the 'xi' parameter of the generalized Pareto distribution, and the + * rest are the same as in the above tests */ + double xi, x, p, np; + } cases[] = { + { 0, 0, 0, 1 }, + { 1e-300, .004, 3.992010656008528e-3, .9960079893439915 }, + { 1e-300, .1, .09516258196404043, .9048374180359595 }, + { 1e-300, 1, .6321205588285577, .36787944117144233 }, + { 1e-300, 10, .9999546000702375, 4.5399929762484854e-5 }, + { 1e-200, 1e-16, 9.999999999999999e-17, .9999999999999999 }, + { 1e-16, 1e-200, 9.999999999999998e-201, 1 }, + { 1e-16, 1e-16, 1e-16, 1 }, + { 1e-16, .004, 3.992010656008528e-3, .9960079893439915 }, + { 1e-16, .1, .09516258196404043, .9048374180359595 }, + { 1e-16, 1, .6321205588285577, .36787944117144233 }, + { 1e-16, 10, .9999546000702375, 4.539992976248509e-5 }, + { 1e-10, 1e-6, 9.999995000001667e-7, .9999990000005 }, + { 1e-8, 1e-8, 9.999999950000001e-9, .9999999900000001 }, + { 1, 1e-300, 1e-300, 1 }, + { 1, 1e-16, 1e-16, .9999999999999999 }, + { 1, .1, .09090909090909091, .9090909090909091 }, + { 1, 1, .5, .5 }, + { 1, 10, .9090909090909091, .0909090909090909 }, + { 1, 100, .9900990099009901, .0099009900990099 }, + { 1, 1000, .999000999000999, 9.990009990009992e-4 }, + { 10, 1e-300, 1e-300, 1 }, + { 10, 1e-16, 9.999999999999995e-17, .9999999999999999 }, + { 10, .1, .06696700846319258, .9330329915368074 }, + { 10, 1, .21320655780322778, .7867934421967723 }, + { 10, 10, .3696701667040189, .6303298332959811 }, + { 10, 100, .49886285755007337, .5011371424499267 }, + { 10, 1000, .6018968102992647, .3981031897007353 }, + }; + double xi_array[] = { -1.5, -1, -1e-30, 0, 1e-30, 1, 1.5 }; + size_t i, j; + double relerr_bound = 3e-15; + bool ok = true; + + for (i = 0; i < arraycount(cases); i++) { + double xi = cases[i].xi; + double x = cases[i].x; + double p = cases[i].p; + double np = cases[i].np; + + CHECK_RELERR(p, cdf_genpareto(x, 0, 1, xi)); + CHECK_RELERR(p, cdf_genpareto(x*2, 0, 2, xi)); + CHECK_RELERR(p, cdf_genpareto(x/2, 0, .5, xi)); + CHECK_RELERR(np, sf_genpareto(x, 0, 1, xi)); + CHECK_RELERR(np, sf_genpareto(x*2, 0, 2, xi)); + CHECK_RELERR(np, sf_genpareto(x/2, 0, .5, xi)); + + if (p < .5) { + CHECK_RELERR(x, icdf_genpareto(p, 0, 1, xi)); + CHECK_RELERR(x*2, icdf_genpareto(p, 0, 2, xi)); + CHECK_RELERR(x/2, icdf_genpareto(p, 0, .5, xi)); + } + if (np < .5) { + CHECK_RELERR(x, isf_genpareto(np, 0, 1, xi)); + CHECK_RELERR(x*2, isf_genpareto(np, 0, 2, xi)); + CHECK_RELERR(x/2, isf_genpareto(np, 0, .5, xi)); + } + } + + for (i = 0; i < arraycount(xi_array); i++) { + for (j = 0; j <= 100; j++) { + double p0 = (j == 0 ? 2*DBL_MIN : (double)j/100); + + /* This is actually a check against 0, but we do <= so that the compiler + does not raise a -Wfloat-equal */ + if (fabs(xi_array[i]) <= 0) { + /* + * When xi == 0, the generalized Pareto + * distribution reduces to an + * exponential distribution. + */ + CHECK_RELERR(-log(p0/2), + sample_genpareto(0, p0, 0)); + CHECK_RELERR(-log1p(-p0/2), + sample_genpareto(1, p0, 0)); + } else { + CHECK_RELERR(expm1(-xi_array[i]*log(p0/2))/xi_array[i], + sample_genpareto(0, p0, xi_array[i])); + CHECK_RELERR((j == 0 ? DBL_MIN : + expm1(-xi_array[i]*log1p(-p0/2))/xi_array[i]), + sample_genpareto(1, p0, xi_array[i])); + } + + CHECK_RELERR(isf_genpareto(p0/2, 0, 1, xi_array[i]), + sample_genpareto(0, p0, xi_array[i])); + CHECK_RELERR(icdf_genpareto(p0/2, 0, 1, xi_array[i]), + sample_genpareto(1, p0, xi_array[i])); + } + } + + tt_assert(ok); + + done: + ; +} + +/** + * Test the deterministic sampler for uniform distribution on [a, b]. + * + * This currently only tests whether the outcome lies within [a, b]. + */ +static void +test_uniform_interval(void *arg) +{ + (void) arg; + struct { + /* Sample from a uniform distribution with parameters 'a' and 'b', using + * 't' as the sampling index. */ + double t, a, b; + } cases[] = { + { 0, 0, 0 }, + { 0, 0, 1 }, + { 0, 1.0000000000000007, 3.999999999999995 }, + { 0, 4000, 4000 }, + { 0.42475836677491291, 4000, 4000 }, + { 0, -DBL_MAX, DBL_MAX }, + { 0.25, -DBL_MAX, DBL_MAX }, + { 0.5, -DBL_MAX, DBL_MAX }, + }; + size_t i = 0; + bool ok = true; + + for (i = 0; i < arraycount(cases); i++) { + double t = cases[i].t; + double a = cases[i].a; + double b = cases[i].b; + + CHECK_LE(a, sample_uniform_interval(t, a, b)); + CHECK_LE(sample_uniform_interval(t, a, b), b); + + CHECK_LE(a, sample_uniform_interval(1 - t, a, b)); + CHECK_LE(sample_uniform_interval(1 - t, a, b), b); + + CHECK_LE(sample_uniform_interval(t, -b, -a), -a); + CHECK_LE(-b, sample_uniform_interval(t, -b, -a)); + + CHECK_LE(sample_uniform_interval(1 - t, -b, -a), -a); + CHECK_LE(-b, sample_uniform_interval(1 - t, -b, -a)); + } + + tt_assert(ok); + + done: + ; +} + +/********************** Stochastic tests ****************************/ + +/* + * Psi test, sometimes also called G-test. The psi test statistic, + * suitably scaled, has chi^2 distribution, but the psi test tends to + * have better statistical power in practice to detect deviations than + * the chi^2 test does. (The chi^2 test statistic is the first term of + * the Taylor expansion of the psi test statistic.) The psi test is + * generic, for any CDF; particular distributions might have higher- + * power tests to distinguish them from predictable deviations or bugs. + * + * We choose the psi critical value so that a single psi test has + * probability below alpha = 1% of spuriously failing even if all the + * code is correct. But the false positive rate for a suite of n tests + * is higher: 1 - Binom(0; n, alpha) = 1 - (1 - alpha)^n. For n = 10, + * this is about 10%, and for n = 100 it is well over 50%. + * + * We can drive it down by running each test twice, and accepting it if + * it passes at least once; in that case, it is as if we used Binom(2; + * 2, alpha) = alpha^2 as the false positive rate for each test, and + * for n = 10 tests, it would be 0.1%, and for n = 100 tests, still + * only 1%. + * + * The critical value for a chi^2 distribution with 100 degrees of + * freedom and false positive rate alpha = 1% was taken from: + * + * NIST/SEMATECH e-Handbook of Statistical Methods, Section + * 1.3.6.7.4 `Critical Values of the Chi-Square Distribution', + * <http://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm>, + * retrieved 2018-10-28. + */ + +static const size_t NSAMPLES = 100000; +/* Number of chances we give to the test to succeed. */ +static const unsigned NTRIALS = 2; +/* Number of times we want the test to pass per NTRIALS. */ +static const unsigned NPASSES_MIN = 1; + +#define PSI_DF 100 /* degrees of freedom */ +static const double PSI_CRITICAL = 135.807; /* critical value, alpha = .01 */ + +/** + * Perform a psi test on an array of sample counts, C, adding up to N + * samples, and an array of log expected probabilities, logP, + * representing the null hypothesis for the distribution of samples + * counted. Return false if the psi test rejects the null hypothesis, + * true if otherwise. + */ +static bool +psi_test(const size_t C[PSI_DF], const double logP[PSI_DF], size_t N) +{ + double psi = 0; + double c = 0; /* Kahan compensation */ + double t, u; + size_t i; + + for (i = 0; i < PSI_DF; i++) { + /* + * c*log(c/(n*p)) = (1/n) * f*log(f/p) where f = c/n is + * the frequency, and f*log(f/p) ---> 0 as f ---> 0, so + * this is a reasonable choice. Further, any mass that + * _fails_ to turn up in this bin will inflate another + * bin instead, so we don't really lose anything by + * ignoring empty bins even if they have high + * probability. + */ + if (C[i] == 0) + continue; + t = C[i]*(log((double)C[i]/N) - logP[i]) - c; + u = psi + t; + c = (u - psi) - t; + psi = u; + } + psi *= 2; + + return psi <= PSI_CRITICAL; +} + +static bool +test_stochastic_geometric_impl(double p) +{ + const struct geometric geometric = { + .base = GEOMETRIC(geometric), + .p = p, + }; + double logP[PSI_DF] = {0}; + unsigned ntry = NTRIALS, npass = 0; + unsigned i; + size_t j; + + /* Compute logP[i] = Geom(i + 1; p). */ + for (i = 0; i < PSI_DF - 1; i++) + logP[i] = logpmf_geometric(i + 1, p); + + /* Compute logP[n-1] = log (1 - (P[0] + P[1] + ... + P[n-2])). */ + logP[PSI_DF - 1] = log1mexp(logsumexp(logP, PSI_DF - 1)); + + while (ntry --> 0) { + size_t C[PSI_DF] = {0}; + + for (j = 0; j < NSAMPLES; j++) { + double n_tmp = dist_sample(&geometric.base); + + /* Must be an integer. (XXX -Wfloat-equal) */ + tor_assert(ceil(n_tmp) <= n_tmp && ceil(n_tmp) >= n_tmp); + + /* Must be a positive integer. */ + tor_assert(n_tmp >= 1); + + /* Probability of getting a value in the billions is negligible. */ + tor_assert(n_tmp <= (double)UINT_MAX); + + unsigned n = (unsigned) n_tmp; + + if (n > PSI_DF) + n = PSI_DF; + C[n - 1]++; + } + + if (psi_test(C, logP, NSAMPLES)) { + if (++npass >= NPASSES_MIN) + break; + } + } + + if (npass >= NPASSES_MIN) { + /* printf("pass %s sampler\n", "geometric"); */ + return true; + } else { + printf("fail %s sampler\n", "geometric"); + return false; + } +} + +/** + * Divide the support of <b>dist</b> into histogram bins in <b>logP</b>. Start + * at the 1st percentile and ending at the 99th percentile. Pick the bin + * boundaries using linear interpolation so that they are uniformly spaced. + * + * In each bin logP[i] we insert the expected log-probability that a sampled + * value will fall into that bin. We will use this as the null hypothesis of + * the psi test. + * + * Set logP[i] = log(CDF(x_i) - CDF(x_{i-1})), where x_-1 = -inf, x_n = + * +inf, and x_i = i*(hi - lo)/(n - 2). + */ +static void +bin_cdfs(const struct dist *dist, double lo, double hi, double *logP, size_t n) +{ +#define CDF(x) dist_cdf(dist, x) +#define SF(x) dist_sf(dist, x) + const double w = (hi - lo)/(n - 2); + double halfway = dist_icdf(dist, 0.5); + double x_0, x_1; + size_t i; + size_t n2 = ceil_to_size_t((halfway - lo)/w); + + tor_assert(lo <= halfway); + tor_assert(halfway <= hi); + tor_assert(n2 <= n); + + x_1 = lo; + logP[0] = log(CDF(x_1) - 0); /* 0 = CDF(-inf) */ + for (i = 1; i < n2; i++) { + x_0 = x_1; + /* do the linear interpolation */ + x_1 = (i <= n/2 ? lo + i*w : hi - (n - 2 - i)*w); + /* set the expected log-probability */ + logP[i] = log(CDF(x_1) - CDF(x_0)); + } + x_0 = hi; + logP[n - 1] = log(SF(x_0) - 0); /* 0 = SF(+inf) = 1 - CDF(+inf) */ + + /* In this loop we are filling out the high part of the array. We are using + * SF because in these cases the CDF is near 1 where precision is lower. So + * instead we are using SF near 0 where the precision is higher. We have + * SF(t) = 1 - CDF(t). */ + for (i = 1; i < n - n2; i++) { + x_1 = x_0; + /* do the linear interpolation */ + x_0 = (i <= n/2 ? hi - i*w : lo + (n - 2 - i)*w); + /* set the expected log-probability */ + logP[n - i - 1] = log(SF(x_0) - SF(x_1)); + } +#undef SF +#undef CDF +} + +/** + * Draw NSAMPLES samples from dist, counting the number of samples x in + * the ith bin C[i] if x_{i-1} <= x < x_i, where x_-1 = -inf, x_n = + * +inf, and x_i = i*(hi - lo)/(n - 2). + */ +static void +bin_samples(const struct dist *dist, double lo, double hi, size_t *C, size_t n) +{ + const double w = (hi - lo)/(n - 2); + size_t i; + + for (i = 0; i < NSAMPLES; i++) { + double x = dist_sample(dist); + size_t bin; + + if (x < lo) + bin = 0; + else if (x < hi) + bin = 1 + floor_to_size_t((x - lo)/w); + else + bin = n - 1; + tor_assert(bin < n); + C[bin]++; + } +} + +/** + * Carry out a Psi test on <b>dist</b>. + * + * Sample NSAMPLES from dist, putting them in bins from -inf to lo to + * hi to +inf, and apply up to two psi tests. True if at least one psi + * test passes; false if not. False positive rate should be bounded by + * 0.01^2 = 0.0001. + */ +static bool +test_psi_dist_sample(const struct dist *dist) +{ + double logP[PSI_DF] = {0}; + unsigned ntry = NTRIALS, npass = 0; + double lo = dist_icdf(dist, 1/(double)(PSI_DF + 2)); + double hi = dist_isf(dist, 1/(double)(PSI_DF + 2)); + + /* Create the null hypothesis in logP */ + bin_cdfs(dist, lo, hi, logP, PSI_DF); + + /* Now run the test */ + while (ntry --> 0) { + size_t C[PSI_DF] = {0}; + bin_samples(dist, lo, hi, C, PSI_DF); + if (psi_test(C, logP, NSAMPLES)) { + if (++npass >= NPASSES_MIN) + break; + } + } + + /* Did we fail or succeed? */ + if (npass >= NPASSES_MIN) { + /* printf("pass %s sampler\n", dist_name(dist));*/ + return true; + } else { + printf("fail %s sampler\n", dist_name(dist)); + return false; + } +} + +/* This is the seed of the deterministic randomness */ +static uint32_t deterministic_rand_counter; + +/** Initialize the seed of the deterministic randomness. */ +static void +init_deterministic_rand(void) +{ + deterministic_rand_counter = crypto_rand_u32(); +} + +/** Produce deterministic randomness for the stochastic tests using the global + * deterministic_rand_counter seed + * + * This function produces deterministic data over multiple calls iff it's + * called in the same call order with the same 'n' parameter (which is the + * case for the psi test). If not, outputs will deviate. */ +static void +crypto_rand_deterministic(char *out, size_t n) +{ + /* Use a XOF to squeeze bytes out of that silly counter */ + crypto_xof_t *xof = crypto_xof_new(); + tor_assert(xof); + crypto_xof_add_bytes(xof, (uint8_t*)&deterministic_rand_counter, + sizeof(deterministic_rand_counter)); + crypto_xof_squeeze_bytes(xof, (uint8_t*)out, n); + crypto_xof_free(xof); + + /* Increase counter for next run */ + deterministic_rand_counter++; +} + +static void +test_stochastic_uniform(void *arg) +{ + (void) arg; + + const struct uniform uniform01 = { + .base = UNIFORM(uniform01), + .a = 0, + .b = 1, + }; + const struct uniform uniform_pos = { + .base = UNIFORM(uniform_pos), + .a = 1.23, + .b = 4.56, + }; + const struct uniform uniform_neg = { + .base = UNIFORM(uniform_neg), + .a = -10, + .b = -1, + }; + const struct uniform uniform_cross = { + .base = UNIFORM(uniform_cross), + .a = -1.23, + .b = 4.56, + }; + const struct uniform uniform_subnormal = { + .base = UNIFORM(uniform_subnormal), + .a = 4e-324, + .b = 4e-310, + }; + const struct uniform uniform_subnormal_cross = { + .base = UNIFORM(uniform_subnormal_cross), + .a = -4e-324, + .b = 4e-310, + }; + bool ok = true; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok &= test_psi_dist_sample(&uniform01.base); + ok &= test_psi_dist_sample(&uniform_pos.base); + ok &= test_psi_dist_sample(&uniform_neg.base); + ok &= test_psi_dist_sample(&uniform_cross.base); + ok &= test_psi_dist_sample(&uniform_subnormal.base); + ok &= test_psi_dist_sample(&uniform_subnormal_cross.base); + + tt_assert(ok); + + done: + ; +} + +static bool +test_stochastic_logistic_impl(double mu, double sigma) +{ + const struct logistic dist = { + .base = LOGISTIC(dist), + .mu = mu, + .sigma = sigma, + }; + + /* XXX Consider some fancier logistic test. */ + return test_psi_dist_sample(&dist.base); +} + +static bool +test_stochastic_log_logistic_impl(double alpha, double beta) +{ + const struct log_logistic dist = { + .base = LOG_LOGISTIC(dist), + .alpha = alpha, + .beta = beta, + }; + + /* XXX Consider some fancier log logistic test. */ + return test_psi_dist_sample(&dist.base); +} + +static bool +test_stochastic_weibull_impl(double lambda, double k) +{ + const struct weibull dist = { + .base = WEIBULL(dist), + .lambda = lambda, + .k = k, + }; + +/* + * XXX Consider applying a Tiku-Singh test: + * + * M.L. Tiku and M. Singh, `Testing the two-parameter + * Weibull distribution', Communications in Statistics -- + * Theory and Methods A10(9), 1981, 907--918. + *https://www.tandfonline.com/doi/pdf/10.1080/03610928108828082?needAccess=true + */ + return test_psi_dist_sample(&dist.base); +} + +static bool +test_stochastic_genpareto_impl(double mu, double sigma, double xi) +{ + const struct genpareto dist = { + .base = GENPARETO(dist), + .mu = mu, + .sigma = sigma, + .xi = xi, + }; + + /* XXX Consider some fancier GPD test. */ + return test_psi_dist_sample(&dist.base); +} + +static void +test_stochastic_genpareto(void *arg) +{ + bool ok = 0; + bool tests_failed = true; + (void) arg; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok = test_stochastic_genpareto_impl(0, 1, -0.25); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(0, 1, -1e-30); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(0, 1, 0); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(0, 1, 1e-30); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(0, 1, 0.25); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(-1, 1, -0.25); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(1, 2, 0.25); + tt_assert(ok); + + tests_failed = false; + + done: + if (tests_failed) { + printf("seed: %"PRIu32, deterministic_rand_counter); + } + UNMOCK(crypto_rand); +} + +static void +test_stochastic_geometric(void *arg) +{ + bool ok = 0; + bool tests_failed = true; + + (void) arg; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok = test_stochastic_geometric_impl(0.1); + tt_assert(ok); + ok = test_stochastic_geometric_impl(0.5); + tt_assert(ok); + ok = test_stochastic_geometric_impl(0.9); + tt_assert(ok); + ok = test_stochastic_geometric_impl(1); + tt_assert(ok); + + tests_failed = false; + + done: + if (tests_failed) { + printf("seed: %"PRIu32, deterministic_rand_counter); + } + UNMOCK(crypto_rand); +} + +static void +test_stochastic_logistic(void *arg) +{ + bool ok = 0; + bool tests_failed = true; + (void) arg; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok = test_stochastic_logistic_impl(0, 1); + tt_assert(ok); + ok = test_stochastic_logistic_impl(0, 1e-16); + tt_assert(ok); + ok = test_stochastic_logistic_impl(1, 10); + tt_assert(ok); + ok = test_stochastic_logistic_impl(-10, 100); + tt_assert(ok); + + tests_failed = false; + + done: + if (tests_failed) { + printf("seed: %"PRIu32, deterministic_rand_counter); + } + UNMOCK(crypto_rand); +} + +static void +test_stochastic_log_logistic(void *arg) +{ + bool ok = 0; + bool tests_failed = true; + (void) arg; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok = test_stochastic_log_logistic_impl(1, 1); + tt_assert(ok); + ok = test_stochastic_log_logistic_impl(1, 10); + tt_assert(ok); + ok = test_stochastic_log_logistic_impl(M_E, 1e-1); + tt_assert(ok); + ok = test_stochastic_log_logistic_impl(exp(-10), 1e-2); + tt_assert(ok); + + tests_failed = false; + + done: + if (tests_failed) { + printf("seed: %"PRIu32, deterministic_rand_counter); + } + UNMOCK(crypto_rand); +} + +static void +test_stochastic_weibull(void *arg) +{ + bool ok = 0; + bool tests_failed = true; + (void) arg; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok = test_stochastic_weibull_impl(1, 0.5); + tt_assert(ok); + ok = test_stochastic_weibull_impl(1, 1); + tt_assert(ok); + ok = test_stochastic_weibull_impl(1, 1.5); + tt_assert(ok); + ok = test_stochastic_weibull_impl(1, 2); + tt_assert(ok); + ok = test_stochastic_weibull_impl(10, 1); + tt_assert(ok); + + tests_failed = false; + + done: + if (tests_failed) { + printf("seed: %"PRIu32, deterministic_rand_counter); + } + UNMOCK(crypto_rand); +} + +struct testcase_t prob_distr_tests[] = { + { "logit_logistics", test_logit_logistic, TT_FORK, NULL, NULL }, + { "log_logistic", test_log_logistic, TT_FORK, NULL, NULL }, + { "weibull", test_weibull, TT_FORK, NULL, NULL }, + { "genpareto", test_genpareto, TT_FORK, NULL, NULL }, + { "uniform_interval", test_uniform_interval, TT_FORK, NULL, NULL }, + END_OF_TESTCASES +}; + +struct testcase_t slow_stochastic_prob_distr_tests[] = { + { "stochastic_genpareto", test_stochastic_genpareto, TT_FORK, NULL, NULL }, + { "stochastic_geometric", test_stochastic_geometric, TT_FORK, NULL, NULL }, + { "stochastic_uniform", test_stochastic_uniform, TT_FORK, NULL, NULL }, + { "stochastic_logistic", test_stochastic_logistic, TT_FORK, NULL, NULL }, + { "stochastic_log_logistic", test_stochastic_log_logistic, TT_FORK, NULL, + NULL }, + { "stochastic_weibull", test_stochastic_weibull, TT_FORK, NULL, NULL }, + END_OF_TESTCASES +}; diff --git a/src/test/test_slow.c b/src/test/test_slow.c index 97c2912af6..39a203c726 100644 --- a/src/test/test_slow.c +++ b/src/test/test_slow.c @@ -21,6 +21,7 @@ struct testgroup_t testgroups[] = { { "slow/crypto/", slow_crypto_tests }, { "slow/process/", slow_process_tests }, + { "slow/prob_distr/", slow_stochastic_prob_distr_tests }, END_OF_GROUPS }; diff --git a/src/test/test_util.c b/src/test/test_util.c index 5ffe2d6b1a..2fd9d24498 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -19,6 +19,7 @@ #include "feature/client/transports.h" #include "lib/crypt_ops/crypto_format.h" #include "lib/crypt_ops/crypto_rand.h" +#include "lib/defs/time.h" #include "test/test.h" #include "lib/memarea/memarea.h" #include "lib/process/waitpid.h" @@ -69,6 +70,28 @@ #define INFINITY_DBL ((double)INFINITY) #define NAN_DBL ((double)NAN) +/** Test the tor_isinf() wrapper */ +static void +test_tor_isinf(void *arg) +{ + (void) arg; + + tt_assert(tor_isinf(INFINITY_DBL)); + + tt_assert(!tor_isinf(NAN_DBL)); + tt_assert(!tor_isinf(DBL_EPSILON)); + tt_assert(!tor_isinf(DBL_MAX)); + tt_assert(!tor_isinf(DBL_MIN)); + + tt_assert(!tor_isinf(0.0)); + tt_assert(!tor_isinf(0.1)); + tt_assert(!tor_isinf(3)); + tt_assert(!tor_isinf(3.14)); + + done: + ; +} + /* XXXX this is a minimal wrapper to make the unit tests compile with the * changed tor_timegm interface. */ static time_t @@ -404,7 +427,6 @@ test_util_time(void *arg) /* Assume tv_usec is an unsigned integer until proven otherwise */ #define TV_USEC_MAX UINT_MAX -#define TOR_USEC_PER_SEC 1000000 /* Overflows in the result type */ @@ -6182,6 +6204,7 @@ struct testcase_t util_tests[] = { UTIL_TEST(mathlog, 0), UTIL_TEST(fraction, 0), UTIL_TEST(weak_random, 0), + { "tor_isinf", test_tor_isinf, TT_FORK, NULL, NULL }, { "socket_ipv4", test_util_socket, TT_FORK, &passthrough_setup, (void*)"4" }, { "socket_ipv6", test_util_socket, TT_FORK, diff --git a/src/trunnel/circpad_negotiation.c b/src/trunnel/circpad_negotiation.c new file mode 100644 index 0000000000..236be06ada --- /dev/null +++ b/src/trunnel/circpad_negotiation.c @@ -0,0 +1,549 @@ +/* circpad_negotiation.c -- generated by Trunnel v1.5.2. + * https://gitweb.torproject.org/trunnel.git + * You probably shouldn't edit this file. + */ +#include <stdlib.h> +#include "trunnel-impl.h" + +#include "circpad_negotiation.h" + +#define TRUNNEL_SET_ERROR_CODE(obj) \ + do { \ + (obj)->trunnel_error_code_ = 1; \ + } while (0) + +#if defined(__COVERITY__) || defined(__clang_analyzer__) +/* If we're running a static analysis tool, we don't want it to complain + * that some of our remaining-bytes checks are dead-code. */ +int circpadnegotiation_deadcode_dummy__ = 0; +#define OR_DEADCODE_DUMMY || circpadnegotiation_deadcode_dummy__ +#else +#define OR_DEADCODE_DUMMY +#endif + +#define CHECK_REMAINING(nbytes, label) \ + do { \ + if (remaining < (nbytes) OR_DEADCODE_DUMMY) { \ + goto label; \ + } \ + } while (0) + +circpad_negotiate_t * +circpad_negotiate_new(void) +{ + circpad_negotiate_t *val = trunnel_calloc(1, sizeof(circpad_negotiate_t)); + if (NULL == val) + return NULL; + val->command = CIRCPAD_COMMAND_START; + return val; +} + +/** Release all storage held inside 'obj', but do not free 'obj'. + */ +static void +circpad_negotiate_clear(circpad_negotiate_t *obj) +{ + (void) obj; +} + +void +circpad_negotiate_free(circpad_negotiate_t *obj) +{ + if (obj == NULL) + return; + circpad_negotiate_clear(obj); + trunnel_memwipe(obj, sizeof(circpad_negotiate_t)); + trunnel_free_(obj); +} + +uint8_t +circpad_negotiate_get_version(const circpad_negotiate_t *inp) +{ + return inp->version; +} +int +circpad_negotiate_set_version(circpad_negotiate_t *inp, uint8_t val) +{ + if (! ((val == 0))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->version = val; + return 0; +} +uint8_t +circpad_negotiate_get_command(const circpad_negotiate_t *inp) +{ + return inp->command; +} +int +circpad_negotiate_set_command(circpad_negotiate_t *inp, uint8_t val) +{ + if (! ((val == CIRCPAD_COMMAND_START || val == CIRCPAD_COMMAND_STOP))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->command = val; + return 0; +} +uint8_t +circpad_negotiate_get_machine_type(const circpad_negotiate_t *inp) +{ + return inp->machine_type; +} +int +circpad_negotiate_set_machine_type(circpad_negotiate_t *inp, uint8_t val) +{ + inp->machine_type = val; + return 0; +} +uint8_t +circpad_negotiate_get_echo_request(const circpad_negotiate_t *inp) +{ + return inp->echo_request; +} +int +circpad_negotiate_set_echo_request(circpad_negotiate_t *inp, uint8_t val) +{ + if (! ((val == 0 || val == 1))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->echo_request = val; + return 0; +} +const char * +circpad_negotiate_check(const circpad_negotiate_t *obj) +{ + if (obj == NULL) + return "Object was NULL"; + if (obj->trunnel_error_code_) + return "A set function failed on this object"; + if (! (obj->version == 0)) + return "Integer out of bounds"; + if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP)) + return "Integer out of bounds"; + if (! (obj->echo_request == 0 || obj->echo_request == 1)) + return "Integer out of bounds"; + return NULL; +} + +ssize_t +circpad_negotiate_encoded_len(const circpad_negotiate_t *obj) +{ + ssize_t result = 0; + + if (NULL != circpad_negotiate_check(obj)) + return -1; + + + /* Length of u8 version IN [0] */ + result += 1; + + /* Length of u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + result += 1; + + /* Length of u8 machine_type */ + result += 1; + + /* Length of u8 echo_request IN [0, 1] */ + result += 1; + return result; +} +int +circpad_negotiate_clear_errors(circpad_negotiate_t *obj) +{ + int r = obj->trunnel_error_code_; + obj->trunnel_error_code_ = 0; + return r; +} +ssize_t +circpad_negotiate_encode(uint8_t *output, const size_t avail, const circpad_negotiate_t *obj) +{ + ssize_t result = 0; + size_t written = 0; + uint8_t *ptr = output; + const char *msg; +#ifdef TRUNNEL_CHECK_ENCODED_LEN + const ssize_t encoded_len = circpad_negotiate_encoded_len(obj); +#endif + + if (NULL != (msg = circpad_negotiate_check(obj))) + goto check_failed; + +#ifdef TRUNNEL_CHECK_ENCODED_LEN + trunnel_assert(encoded_len >= 0); +#endif + + /* Encode u8 version IN [0] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->version)); + written += 1; ptr += 1; + + /* Encode u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->command)); + written += 1; ptr += 1; + + /* Encode u8 machine_type */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->machine_type)); + written += 1; ptr += 1; + + /* Encode u8 echo_request IN [0, 1] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->echo_request)); + written += 1; ptr += 1; + + + trunnel_assert(ptr == output + written); +#ifdef TRUNNEL_CHECK_ENCODED_LEN + { + trunnel_assert(encoded_len >= 0); + trunnel_assert((size_t)encoded_len == written); + } + +#endif + + return written; + + truncated: + result = -2; + goto fail; + check_failed: + (void)msg; + result = -1; + goto fail; + fail: + trunnel_assert(result < 0); + return result; +} + +/** As circpad_negotiate_parse(), but do not allocate the output + * object. + */ +static ssize_t +circpad_negotiate_parse_into(circpad_negotiate_t *obj, const uint8_t *input, const size_t len_in) +{ + const uint8_t *ptr = input; + size_t remaining = len_in; + ssize_t result = 0; + (void)result; + + /* Parse u8 version IN [0] */ + CHECK_REMAINING(1, truncated); + obj->version = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->version == 0)) + goto fail; + + /* Parse u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + CHECK_REMAINING(1, truncated); + obj->command = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP)) + goto fail; + + /* Parse u8 machine_type */ + CHECK_REMAINING(1, truncated); + obj->machine_type = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + + /* Parse u8 echo_request IN [0, 1] */ + CHECK_REMAINING(1, truncated); + obj->echo_request = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->echo_request == 0 || obj->echo_request == 1)) + goto fail; + trunnel_assert(ptr + remaining == input + len_in); + return len_in - remaining; + + truncated: + return -2; + fail: + result = -1; + return result; +} + +ssize_t +circpad_negotiate_parse(circpad_negotiate_t **output, const uint8_t *input, const size_t len_in) +{ + ssize_t result; + *output = circpad_negotiate_new(); + if (NULL == *output) + return -1; + result = circpad_negotiate_parse_into(*output, input, len_in); + if (result < 0) { + circpad_negotiate_free(*output); + *output = NULL; + } + return result; +} +circpad_negotiated_t * +circpad_negotiated_new(void) +{ + circpad_negotiated_t *val = trunnel_calloc(1, sizeof(circpad_negotiated_t)); + if (NULL == val) + return NULL; + val->command = CIRCPAD_COMMAND_START; + val->response = CIRCPAD_RESPONSE_ERR; + return val; +} + +/** Release all storage held inside 'obj', but do not free 'obj'. + */ +static void +circpad_negotiated_clear(circpad_negotiated_t *obj) +{ + (void) obj; +} + +void +circpad_negotiated_free(circpad_negotiated_t *obj) +{ + if (obj == NULL) + return; + circpad_negotiated_clear(obj); + trunnel_memwipe(obj, sizeof(circpad_negotiated_t)); + trunnel_free_(obj); +} + +uint8_t +circpad_negotiated_get_version(const circpad_negotiated_t *inp) +{ + return inp->version; +} +int +circpad_negotiated_set_version(circpad_negotiated_t *inp, uint8_t val) +{ + if (! ((val == 0))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->version = val; + return 0; +} +uint8_t +circpad_negotiated_get_command(const circpad_negotiated_t *inp) +{ + return inp->command; +} +int +circpad_negotiated_set_command(circpad_negotiated_t *inp, uint8_t val) +{ + if (! ((val == CIRCPAD_COMMAND_START || val == CIRCPAD_COMMAND_STOP))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->command = val; + return 0; +} +uint8_t +circpad_negotiated_get_response(const circpad_negotiated_t *inp) +{ + return inp->response; +} +int +circpad_negotiated_set_response(circpad_negotiated_t *inp, uint8_t val) +{ + if (! ((val == CIRCPAD_RESPONSE_ERR || val == CIRCPAD_RESPONSE_OK))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->response = val; + return 0; +} +uint8_t +circpad_negotiated_get_machine_type(const circpad_negotiated_t *inp) +{ + return inp->machine_type; +} +int +circpad_negotiated_set_machine_type(circpad_negotiated_t *inp, uint8_t val) +{ + inp->machine_type = val; + return 0; +} +const char * +circpad_negotiated_check(const circpad_negotiated_t *obj) +{ + if (obj == NULL) + return "Object was NULL"; + if (obj->trunnel_error_code_) + return "A set function failed on this object"; + if (! (obj->version == 0)) + return "Integer out of bounds"; + if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP)) + return "Integer out of bounds"; + if (! (obj->response == CIRCPAD_RESPONSE_ERR || obj->response == CIRCPAD_RESPONSE_OK)) + return "Integer out of bounds"; + return NULL; +} + +ssize_t +circpad_negotiated_encoded_len(const circpad_negotiated_t *obj) +{ + ssize_t result = 0; + + if (NULL != circpad_negotiated_check(obj)) + return -1; + + + /* Length of u8 version IN [0] */ + result += 1; + + /* Length of u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + result += 1; + + /* Length of u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */ + result += 1; + + /* Length of u8 machine_type */ + result += 1; + return result; +} +int +circpad_negotiated_clear_errors(circpad_negotiated_t *obj) +{ + int r = obj->trunnel_error_code_; + obj->trunnel_error_code_ = 0; + return r; +} +ssize_t +circpad_negotiated_encode(uint8_t *output, const size_t avail, const circpad_negotiated_t *obj) +{ + ssize_t result = 0; + size_t written = 0; + uint8_t *ptr = output; + const char *msg; +#ifdef TRUNNEL_CHECK_ENCODED_LEN + const ssize_t encoded_len = circpad_negotiated_encoded_len(obj); +#endif + + if (NULL != (msg = circpad_negotiated_check(obj))) + goto check_failed; + +#ifdef TRUNNEL_CHECK_ENCODED_LEN + trunnel_assert(encoded_len >= 0); +#endif + + /* Encode u8 version IN [0] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->version)); + written += 1; ptr += 1; + + /* Encode u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->command)); + written += 1; ptr += 1; + + /* Encode u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->response)); + written += 1; ptr += 1; + + /* Encode u8 machine_type */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->machine_type)); + written += 1; ptr += 1; + + + trunnel_assert(ptr == output + written); +#ifdef TRUNNEL_CHECK_ENCODED_LEN + { + trunnel_assert(encoded_len >= 0); + trunnel_assert((size_t)encoded_len == written); + } + +#endif + + return written; + + truncated: + result = -2; + goto fail; + check_failed: + (void)msg; + result = -1; + goto fail; + fail: + trunnel_assert(result < 0); + return result; +} + +/** As circpad_negotiated_parse(), but do not allocate the output + * object. + */ +static ssize_t +circpad_negotiated_parse_into(circpad_negotiated_t *obj, const uint8_t *input, const size_t len_in) +{ + const uint8_t *ptr = input; + size_t remaining = len_in; + ssize_t result = 0; + (void)result; + + /* Parse u8 version IN [0] */ + CHECK_REMAINING(1, truncated); + obj->version = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->version == 0)) + goto fail; + + /* Parse u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + CHECK_REMAINING(1, truncated); + obj->command = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP)) + goto fail; + + /* Parse u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */ + CHECK_REMAINING(1, truncated); + obj->response = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->response == CIRCPAD_RESPONSE_ERR || obj->response == CIRCPAD_RESPONSE_OK)) + goto fail; + + /* Parse u8 machine_type */ + CHECK_REMAINING(1, truncated); + obj->machine_type = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + trunnel_assert(ptr + remaining == input + len_in); + return len_in - remaining; + + truncated: + return -2; + fail: + result = -1; + return result; +} + +ssize_t +circpad_negotiated_parse(circpad_negotiated_t **output, const uint8_t *input, const size_t len_in) +{ + ssize_t result; + *output = circpad_negotiated_new(); + if (NULL == *output) + return -1; + result = circpad_negotiated_parse_into(*output, input, len_in); + if (result < 0) { + circpad_negotiated_free(*output); + *output = NULL; + } + return result; +} diff --git a/src/trunnel/circpad_negotiation.h b/src/trunnel/circpad_negotiation.h new file mode 100644 index 0000000000..d09080dc16 --- /dev/null +++ b/src/trunnel/circpad_negotiation.h @@ -0,0 +1,195 @@ +/* circpad_negotiation.h -- generated by Trunnel v1.5.2. + * https://gitweb.torproject.org/trunnel.git + * You probably shouldn't edit this file. + */ +#ifndef TRUNNEL_CIRCPAD_NEGOTIATION_H +#define TRUNNEL_CIRCPAD_NEGOTIATION_H + +#include <stdint.h> +#include "trunnel.h" + +#define CIRCPAD_COMMAND_STOP 1 +#define CIRCPAD_COMMAND_START 2 +#define CIRCPAD_RESPONSE_OK 1 +#define CIRCPAD_RESPONSE_ERR 2 +#define CIRCPAD_MACHINE_CIRC_SETUP 1 +/** + * This command tells the relay to alter its min and max netflow + * timeout range values, and send padding at that rate (resuming + * if stopped). */ +#if !defined(TRUNNEL_OPAQUE) && !defined(TRUNNEL_OPAQUE_CIRCPAD_NEGOTIATE) +struct circpad_negotiate_st { + uint8_t version; + uint8_t command; + /** Machine type is left unbounded because we can specify + * new machines in the consensus */ + uint8_t machine_type; + /** If true, send a relay_drop reply.. */ + uint8_t echo_request; + uint8_t trunnel_error_code_; +}; +#endif +typedef struct circpad_negotiate_st circpad_negotiate_t; +/** + * This command tells the relay to alter its min and max netflow + * timeout range values, and send padding at that rate (resuming + * if stopped). */ +#if !defined(TRUNNEL_OPAQUE) && !defined(TRUNNEL_OPAQUE_CIRCPAD_NEGOTIATED) +struct circpad_negotiated_st { + uint8_t version; + uint8_t command; + uint8_t response; + /** Machine type is left unbounded because we can specify + * new machines in the consensus */ + uint8_t machine_type; + uint8_t trunnel_error_code_; +}; +#endif +typedef struct circpad_negotiated_st circpad_negotiated_t; +/** Return a newly allocated circpad_negotiate with all elements set + * to zero. + */ +circpad_negotiate_t *circpad_negotiate_new(void); +/** Release all storage held by the circpad_negotiate in 'victim'. (Do + * nothing if 'victim' is NULL.) + */ +void circpad_negotiate_free(circpad_negotiate_t *victim); +/** Try to parse a circpad_negotiate from the buffer in 'input', using + * up to 'len_in' bytes from the input buffer. On success, return the + * number of bytes consumed and set *output to the newly allocated + * circpad_negotiate_t. On failure, return -2 if the input appears + * truncated, and -1 if the input is otherwise invalid. + */ +ssize_t circpad_negotiate_parse(circpad_negotiate_t **output, const uint8_t *input, const size_t len_in); +/** Return the number of bytes we expect to need to encode the + * circpad_negotiate in 'obj'. On failure, return a negative value. + * Note that this value may be an overestimate, and can even be an + * underestimate for certain unencodeable objects. + */ +ssize_t circpad_negotiate_encoded_len(const circpad_negotiate_t *obj); +/** Try to encode the circpad_negotiate from 'input' into the buffer + * at 'output', using up to 'avail' bytes of the output buffer. On + * success, return the number of bytes used. On failure, return -2 if + * the buffer was not long enough, and -1 if the input was invalid. + */ +ssize_t circpad_negotiate_encode(uint8_t *output, size_t avail, const circpad_negotiate_t *input); +/** Check whether the internal state of the circpad_negotiate in 'obj' + * is consistent. Return NULL if it is, and a short message if it is + * not. + */ +const char *circpad_negotiate_check(const circpad_negotiate_t *obj); +/** Clear any errors that were set on the object 'obj' by its setter + * functions. Return true iff errors were cleared. + */ +int circpad_negotiate_clear_errors(circpad_negotiate_t *obj); +/** Return the value of the version field of the circpad_negotiate_t + * in 'inp' + */ +uint8_t circpad_negotiate_get_version(const circpad_negotiate_t *inp); +/** Set the value of the version field of the circpad_negotiate_t in + * 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiate_set_version(circpad_negotiate_t *inp, uint8_t val); +/** Return the value of the command field of the circpad_negotiate_t + * in 'inp' + */ +uint8_t circpad_negotiate_get_command(const circpad_negotiate_t *inp); +/** Set the value of the command field of the circpad_negotiate_t in + * 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiate_set_command(circpad_negotiate_t *inp, uint8_t val); +/** Return the value of the machine_type field of the + * circpad_negotiate_t in 'inp' + */ +uint8_t circpad_negotiate_get_machine_type(const circpad_negotiate_t *inp); +/** Set the value of the machine_type field of the circpad_negotiate_t + * in 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiate_set_machine_type(circpad_negotiate_t *inp, uint8_t val); +/** Return the value of the echo_request field of the + * circpad_negotiate_t in 'inp' + */ +uint8_t circpad_negotiate_get_echo_request(const circpad_negotiate_t *inp); +/** Set the value of the echo_request field of the circpad_negotiate_t + * in 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiate_set_echo_request(circpad_negotiate_t *inp, uint8_t val); +/** Return a newly allocated circpad_negotiated with all elements set + * to zero. + */ +circpad_negotiated_t *circpad_negotiated_new(void); +/** Release all storage held by the circpad_negotiated in 'victim'. + * (Do nothing if 'victim' is NULL.) + */ +void circpad_negotiated_free(circpad_negotiated_t *victim); +/** Try to parse a circpad_negotiated from the buffer in 'input', + * using up to 'len_in' bytes from the input buffer. On success, + * return the number of bytes consumed and set *output to the newly + * allocated circpad_negotiated_t. On failure, return -2 if the input + * appears truncated, and -1 if the input is otherwise invalid. + */ +ssize_t circpad_negotiated_parse(circpad_negotiated_t **output, const uint8_t *input, const size_t len_in); +/** Return the number of bytes we expect to need to encode the + * circpad_negotiated in 'obj'. On failure, return a negative value. + * Note that this value may be an overestimate, and can even be an + * underestimate for certain unencodeable objects. + */ +ssize_t circpad_negotiated_encoded_len(const circpad_negotiated_t *obj); +/** Try to encode the circpad_negotiated from 'input' into the buffer + * at 'output', using up to 'avail' bytes of the output buffer. On + * success, return the number of bytes used. On failure, return -2 if + * the buffer was not long enough, and -1 if the input was invalid. + */ +ssize_t circpad_negotiated_encode(uint8_t *output, size_t avail, const circpad_negotiated_t *input); +/** Check whether the internal state of the circpad_negotiated in + * 'obj' is consistent. Return NULL if it is, and a short message if + * it is not. + */ +const char *circpad_negotiated_check(const circpad_negotiated_t *obj); +/** Clear any errors that were set on the object 'obj' by its setter + * functions. Return true iff errors were cleared. + */ +int circpad_negotiated_clear_errors(circpad_negotiated_t *obj); +/** Return the value of the version field of the circpad_negotiated_t + * in 'inp' + */ +uint8_t circpad_negotiated_get_version(const circpad_negotiated_t *inp); +/** Set the value of the version field of the circpad_negotiated_t in + * 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiated_set_version(circpad_negotiated_t *inp, uint8_t val); +/** Return the value of the command field of the circpad_negotiated_t + * in 'inp' + */ +uint8_t circpad_negotiated_get_command(const circpad_negotiated_t *inp); +/** Set the value of the command field of the circpad_negotiated_t in + * 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiated_set_command(circpad_negotiated_t *inp, uint8_t val); +/** Return the value of the response field of the circpad_negotiated_t + * in 'inp' + */ +uint8_t circpad_negotiated_get_response(const circpad_negotiated_t *inp); +/** Set the value of the response field of the circpad_negotiated_t in + * 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiated_set_response(circpad_negotiated_t *inp, uint8_t val); +/** Return the value of the machine_type field of the + * circpad_negotiated_t in 'inp' + */ +uint8_t circpad_negotiated_get_machine_type(const circpad_negotiated_t *inp); +/** Set the value of the machine_type field of the + * circpad_negotiated_t in 'inp' to 'val'. Return 0 on success; return + * -1 and set the error code on 'inp' on failure. + */ +int circpad_negotiated_set_machine_type(circpad_negotiated_t *inp, uint8_t val); + + +#endif diff --git a/src/trunnel/circpad_negotiation.trunnel b/src/trunnel/circpad_negotiation.trunnel new file mode 100644 index 0000000000..abbc929cc5 --- /dev/null +++ b/src/trunnel/circpad_negotiation.trunnel @@ -0,0 +1,44 @@ +/* These are the padding negotiation commands */ +const CIRCPAD_COMMAND_STOP = 1; +const CIRCPAD_COMMAND_START = 2; + +/* Responses to commands */ +const CIRCPAD_RESPONSE_OK = 1; +const CIRCPAD_RESPONSE_ERR = 2; + +/* Built-in machine types */ + +/* 1) Machine that obscures circuit setup */ +const CIRCPAD_MACHINE_CIRC_SETUP = 1; + +/** + * This command tells the relay to alter its min and max netflow + * timeout range values, and send padding at that rate (resuming + * if stopped). */ +struct circpad_negotiate { + u8 version IN [0]; + u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP]; + + /** Machine type is left unbounded because we can specify + * new machines in the consensus */ + u8 machine_type; + + /** If true, send a relay_drop reply.. */ + // FIXME-MP-AP: Maybe we just say to transition to the first state + // here instead.. Also what about delay before responding? + u8 echo_request IN [0,1]; +}; + +/** + * This command tells the relay to alter its min and max netflow + * timeout range values, and send padding at that rate (resuming + * if stopped). */ +struct circpad_negotiated { + u8 version IN [0]; + u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP]; + u8 response IN [CIRCPAD_RESPONSE_OK, CIRCPAD_RESPONSE_ERR]; + + /** Machine type is left unbounded because we can specify + * new machines in the consensus */ + u8 machine_type; +}; diff --git a/src/trunnel/include.am b/src/trunnel/include.am index b5db0609a8..4f4f1d3624 100644 --- a/src/trunnel/include.am +++ b/src/trunnel/include.am @@ -11,7 +11,8 @@ TRUNNELINPUTS = \ src/trunnel/link_handshake.trunnel \ src/trunnel/pwbox.trunnel \ src/trunnel/channelpadding_negotiation.trunnel \ - src/trunner/socks5.trunnel + src/trunnel/socks5.trunnel \ + src/trunnel/circpad_negotiation.trunnel TRUNNELSOURCES = \ src/ext/trunnel/trunnel.c \ @@ -23,8 +24,9 @@ TRUNNELSOURCES = \ src/trunnel/hs/cell_introduce1.c \ src/trunnel/hs/cell_rendezvous.c \ src/trunnel/channelpadding_negotiation.c \ - src/trunnel/socks5.c \ - src/trunnel/netinfo.c + src/trunnel/socks5.c \ + src/trunnel/netinfo.c \ + src/trunnel/circpad_negotiation.c TRUNNELHEADERS = \ src/ext/trunnel/trunnel.h \ @@ -39,7 +41,8 @@ TRUNNELHEADERS = \ src/trunnel/hs/cell_rendezvous.h \ src/trunnel/channelpadding_negotiation.h \ src/trunnel/socks5.h \ - src/trunnel/netinfo.h + src/trunnel/netinfo.h \ + src/trunnel/circpad_negotiation.h src_trunnel_libor_trunnel_a_SOURCES = $(TRUNNELSOURCES) src_trunnel_libor_trunnel_a_CPPFLAGS = \ |