From 3ba7581129dcffb16cca2aa8cfc2bd0fa4c8dd06 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 23 Oct 2018 19:23:09 +0000 Subject: Provide a smartlist reverse-order traversal. We need this for padding negotiation so that we can have later machine revisions supercede earlier ones. Co-authored-by: George Kadianakis --- src/lib/smartlist_core/smartlist_foreach.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/lib/smartlist_core/smartlist_foreach.h b/src/lib/smartlist_core/smartlist_foreach.h index 54f08ac47d..14f2930c9f 100644 --- a/src/lib/smartlist_core/smartlist_foreach.h +++ b/src/lib/smartlist_core/smartlist_foreach.h @@ -83,6 +83,14 @@ ++var ## _sl_idx) { \ var = (sl)->list[var ## _sl_idx]; +#define SMARTLIST_FOREACH_REVERSE_BEGIN(sl, type, var) \ + STMT_BEGIN \ + int var ## _sl_idx, var ## _sl_len=(sl)->num_used; \ + type var; \ + for (var ## _sl_idx = var ## _sl_len-1; var ## _sl_idx >= 0; \ + --var ## _sl_idx) { \ + var = (sl)->list[var ## _sl_idx]; + #define SMARTLIST_FOREACH_END(var) \ var = NULL; \ (void) var ## _sl_idx; \ -- cgit v1.2.3-54-g00ecf From 2a24e21fb07ade157f1226039d71713c0c5b47b7 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 23 Oct 2018 19:48:16 +0000 Subject: Circuit padding header. This is a good code review start point, to get an overview of the interfaces and types used in circuit padding. Co-authored-by: George Kadianakis --- src/core/or/circuitpadding.h | 685 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 685 insertions(+) create mode 100644 src/core/or/circuitpadding.h diff --git a/src/core/or/circuitpadding.h b/src/core/or/circuitpadding.h new file mode 100644 index 0000000000..24034a4548 --- /dev/null +++ b/src/core/or/circuitpadding.h @@ -0,0 +1,685 @@ +/* + * Copyright (c) 2017, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file circuitpadding.h + * \brief Header file for circuitpadding.c. + **/ +#ifndef TOR_CIRCUITPADDING_H +#define TOR_CIRCUITPADDING_H + +#include "circpad_negotiation.h" +#include "lib/evloop/timers.h" + +typedef struct circuit_t circuit_t; +typedef struct origin_circuit_t origin_circuit_t; +typedef struct cell_t cell_t; + +/** + * Signed error return with the specific property that negative + * values mean error codes of various semantics, 0 means success, + * and positive values are unused. + * + * XXX: Tor uses this concept a lot but just calls it int. Should we move + * this somewhere centralized? Where? + */ +typedef int signed_error_t; + +/** + * These constants specify the types of events that can cause + * transitions between state machine states. + * + * Note that SENT and RECV are relative to this endpoint. For + * relays, SENT means packets destined towards the client and + * RECV means packets destined towards the relay. On the client, + * SENT means packets destined towards the relay, where as RECV + * means packets destined towards the client. + */ +typedef enum { + /* A non-padding cell was received. */ + CIRCPAD_EVENT_NONPADDING_RECV = 0, + /* A non-padding cell was sent. */ + CIRCPAD_EVENT_NONPADDING_SENT = 1, + /* A padding cell (RELAY_COMMAND_DROP) was sent. */ + CIRCPAD_EVENT_PADDING_SENT = 2, + /* A padding cell was received. */ + CIRCPAD_EVENT_PADDING_RECV = 3, + /* We tried to schedule padding but we ended up picking the infinity bin + * which means that padding was delayed infinitely */ + CIRCPAD_EVENT_INFINITY = 4, + /* All histogram bins are empty (we are out of tokens) */ + CIRCPAD_EVENT_BINS_EMPTY = 5, + /* just a counter of the events above */ + CIRCPAD_EVENT_LENGTH_COUNT = 6 +} circpad_event_t; +#define CIRCPAD_NUM_EVENTS ((int)CIRCPAD_EVENT_LENGTH_COUNT+1) + +/** Boolean type that says if we decided to transition states or not */ +typedef enum { + CIRCPAD_STATE_UNCHANGED = 0, + CIRCPAD_STATE_CHANGED = 1 +} circpad_decision_t; + +/** The type for the things in histogram bins (aka tokens) */ +typedef uint32_t circpad_hist_token_t; + +/** The type for histogram indexes (needs to be negative for errors) */ +typedef int8_t circpad_hist_index_t; + +/** The type for absolute time, from monotime_absolute_usec() */ +typedef uint64_t circpad_time_t; + +/** The type for timer delays, in microseconds */ +typedef uint32_t circpad_delay_t; + +/** + * An infinite padding cell delay means don't schedule any padding -- + * simply wait until a different event triggers a transition. + * + * This means that the maximum delay we can scedule is UINT32_MAX-1 + * microseconds, or about 4300 seconds (1.25 hours). + * XXX: Is this enough if we want to simulate light, intermittent + * activity on an onion service? + */ +#define CIRCPAD_DELAY_INFINITE (UINT32_MAX) + +/** + * Macro to clarify when we're checking the infinity bin. + * + * Works with either circpad_state_t or circpad_machineinfo_t + */ +#define CIRCPAD_INFINITY_BIN(mi) ((mi)->histogram_len-1) + +/** + * These constants form a bitfield that specifies when a state machine + * should be applied to a circuit. + * + * If any of these elements is set, then the circuit will be tested against + * that specific condition. If an element is unset, then we don't test it. + * (E.g. If neither NO_STREAMS or STREAMS are set, then we will not care + * whether a circuit has streams attached when we apply a state machine) + * + * The helper function circpad_circuit_state() converts circuit state + * flags into this more compact representation. + */ +typedef enum { + /* Only apply machine if the circuit is still building */ + CIRCPAD_CIRC_BUILDING = 1<<0, + /* Only apply machine if the circuit is open */ + CIRCPAD_CIRC_OPENED = 1<<1, + /* Only apply machine if the circuit has no attached streams */ + CIRCPAD_CIRC_NO_STREAMS = 1<<2, + /* Only apply machine if the circuit has attached streams */ + CIRCPAD_CIRC_STREAMS = 1<<3, + /* Only apply machine if the circuit still allows RELAY_EARLY cells */ + CIRCPAD_CIRC_HAS_RELAY_EARLY = 1<<4, + /* Only apply machine if the circuit has depleted its RELAY_EARLY cells + * allowance. */ + CIRCPAD_CIRC_HAS_NO_RELAY_EARLY = 1<<5 +} circpad_circuit_state_t; + +/** Bitmask that says "apply this machine to all states" */ +#define CIRCPAD_STATE_ALL \ + (CIRCPAD_CIRC_BUILDING|CIRCPAD_CIRC_OPENED| \ + CIRCPAD_CIRC_STREAMS|CIRCPAD_CIRC_NO_STREAMS| \ + CIRCPAD_CIRC_HAS_RELAY_EARLY|CIRCPAD_CIRC_HAS_NO_RELAY_EARLY) + +/** + * A compact circuit purpose bitfield mask that allows us to compactly + * specify which circuit purposes a machine should apply to. + * + * The helper function circpad_circ_purpose_to_mask() converts circuit + * purposes into bit positions in this bitmask. + */ +typedef uint32_t circpad_purpose_mask_t; + +/** Bitmask that says "apply this machine to all purposes". */ +#define CIRCPAD_PURPOSE_ALL (0xFFFFFFFF) + +/** + * This type specifies all of the conditions that must be met before + * a client decides to initiate padding on a circuit. + * + * A circuit must satisfy every sub-field in this type in order + * to be considered to match the conditions. + */ +typedef struct circpad_machine_conditions_t { + /** Only apply the machine *if* the circuit has at least this many hops */ + unsigned min_hops : 3; + + /** Only apply the machine *if* vanguards are enabled */ + unsigned requires_vanguards : 1; + + /** Only apply the machine *if* the circuit's state matches any of + * the bits set in this bitmask. */ + circpad_circuit_state_t state_mask; + + /** Only apply a machine *if* the circuit's purpose matches one + * of the bits set in this bitmask */ + circpad_purpose_mask_t purpose_mask; + +} circpad_machine_conditions_t; + +/** + * Token removal strategy options. + * + * The WTF-PAD histograms are meant to specify a target distribution to shape + * traffic towards. This is accomplished by removing tokens from the histogram + * when either padding or non-padding cells are sent. + * + * When we see a non-padding cell at a particular time since the last cell, you + * remove a token from the corresponding delay bin. These flags specify + * which bin to choose if that bin is already empty. + */ +typedef enum { + /** Don't remove any tokens */ + CIRCPAD_TOKEN_REMOVAL_NONE = 0, + /** + * Remove from the first non-zero higher bin index when current is zero. + * This is the recommended strategy from the Adaptive Padding paper. */ + CIRCPAD_TOKEN_REMOVAL_HIGHER = 1, + /** Remove from the first non-zero lower bin index when current is empty. */ + CIRCPAD_TOKEN_REMOVAL_LOWER = 2, + /** Remove from the closest non-zero bin index when current is empty. */ + CIRCPAD_TOKEN_REMOVAL_CLOSEST = 3, + /** Remove from the closest bin by time value (since bins are + * exponentially spaced). */ + CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC = 4, + /** Only remove from the exact bin corresponding to this delay. If + * the bin is 0, simply do nothing. Don't pick another bin. */ + CIRCPAD_TOKEN_REMOVAL_EXACT = 5 +} circpad_removal_t; + +/** + * Distribution types supported by circpad_distribution_sample(). + * + * These can be used instead of histograms for the inter-packet + * timing distribution, or to specify a distribution on the number + * of cells that can be sent while in a specific state of the state + * machine. */ +typedef enum { + CIRCPAD_DIST_NONE = 0, + CIRCPAD_DIST_UNIFORM = 1, + CIRCPAD_DIST_LOGISTIC = 2, + CIRCPAD_DIST_LOG_LOGISTIC = 3, + CIRCPAD_DIST_GEOMETRIC = 4, + CIRCPAD_DIST_WEIBULL = 5, + CIRCPAD_DIST_PARETO = 6 +} circpad_distribution_type_t; + +/** + * Distribution information. + * + * This type specifies a specific distribution above, as well as + * up to two parameters for that distribution. The specific + * per-distribution meaning of these parameters is specified + * in circpad_distribution_sample(). + */ +typedef struct circpad_distribution_t { + circpad_distribution_type_t type; + double param1; + double param2; +} circpad_distribution_t; + +/** State number type. Represents current state of state machine. */ +typedef uint16_t circpad_statenum_t; +#define CIRCPAD_STATENUM_MAX (UINT16_MAX) + +/** A histogram is used to sample padding delays given a machine state. This + * constant defines the maximum histogram width (i.e. the max number of bins) + * + * Each histogram bin is twice as large as the previous. Two exceptions: The + * first bin has zero width (which means that minimum delay is applied to the + * next padding cell), and the last bin (infinity bin) has infinite width + * (which means that the next padding cell will be delayed infinitely). */ +#define CIRCPAD_MAX_HISTOGRAM_LEN (sizeof(circpad_delay_t)*8 + 1) + +/** + * A state of a padding machine. The information here are immutable and + * represent the initial form of the state; it does not get updated as things + * happen. The mutable information that gets updated in runtime are carried in + * a circpad_machineinfo_t. + * + * This struct describes the histograms and parameters of a single + * state in the adaptive padding machine. Instances of this struct + * exist in global circpad machine definitions that come from torrc + * or the consensus. + */ +typedef struct circpad_state_t { + /** If a histogram is used for this state, this specifies the number of bins + * of this histogram. Histograms must have at least 2 bins. + * + * If a delay probability distribution is used for this state, this is set + * to 0. */ + circpad_hist_index_t histogram_len; + /** The histogram itself: an array of uint16s of tokens, whose + * widths are exponentially spaced, in microseconds */ + circpad_hist_token_t histogram[CIRCPAD_MAX_HISTOGRAM_LEN]; + /** Total number of tokens in this histogram. This is a constant and is *not* + * decremented every time we spend a token. It's used for initializing and + * refilling the histogram. */ + uint32_t histogram_total_tokens; + + /** Minimum padding delay of this state in microseconds. + * + * If histograms are used, this is the left (and right) bound of the first + * bin (since it has zero width). + * + * If a delay probability distribution is used, this represents the minimum + * delay we can sample from the distribution. + */ + circpad_delay_t start_usec; + + /** If histograms are used, this is the width of the whole histogram in + * microseconds, and it's used to calculate individual bin width. + * + * If a delay probability distribution is used, this is used as the max + * delay we can sample from the distribution. + */ + circpad_delay_t range_usec; + + /** + * Represents a delay probability distribution (aka IAT distribution). It's a + * parametrized way of encoding inter-packet delay information in + * microseconds. It can be used instead of histograms. + * + * If it is used, token_removal below must be set to + * CIRCPAD_TOKEN_REMOVAL_NONE. + * + * Start_usec, range_sec, and rtt_estimates are still applied to the + * results of sampling from this distribution (range_sec is used as a max). + */ + circpad_distribution_t iat_dist; + + /** + * The length dist is a parameterized way of encoding how long this + * state machine runs in terms of sent padding cells or all + * sent cells. Values are sampled from this distribution, clamped + * to max_len, and then start_len is added to that value. + * + * It may be specified instead of or in addition to + * the infinity bins and bins empty conditions. */ + circpad_distribution_t length_dist; + /** A minimum length value, added to the output of length_dist */ + uint16_t start_length; + /** A cap on the length value that can be sampled from the length_dist */ + uint64_t max_length; + + /** Should we decrement length when we see a nonpadding packet? + * XXX: Are there any machines that actually want to set this to 0? There may + * not be. OTOH, it's only a bit.. */ + unsigned length_includes_nonpadding : 1; + + /** + * This is an array that specifies the next state to transition to upon + * receipt an event matching the indicated array index. + * + * This aborts our scheduled packet and switches to the state + * corresponding to the index of the array. Tokens are filled upon + * this transition. + * + * States are allowed to transition to themselves, which means re-schedule + * a new padding timer. They are also allowed to temporarily "transition" + * to the "IGNORE" and "CANCEL" pseudo-states. See #defines below + * for details on state behavior and meaning. + */ + circpad_statenum_t next_state[CIRCPAD_NUM_EVENTS]; + + /** + * If true, estimate the RTT from this relay to the exit/website and add that + * to start_usec for use as the histogram bin 0 start delay. + * + * Right now this is only supported for relay-side state machines. + */ + unsigned use_rtt_estimate : 1; + + /** This specifies the token removal strategy to use upon padding and + * non-padding activity. */ + circpad_removal_t token_removal; +} circpad_state_t; + +/** + * The start state for this machine. + * + * In the original WTF-PAD, this is only used for transition to/from + * the burst state. All other fields are not used. But to simplify the + * code we've made it a first-class state. This has no performance + * consequences, but may make naive serialization of the state machine + * large, if we're not careful about how we represent empty fields. + */ +#define CIRCPAD_STATE_START 0 + +/** + * The burst state for this machine. + * + * In the original Adaptive Padding algorithm and in WTF-PAD + * (https://www.freehaven.net/anonbib/cache/ShWa-Timing06.pdf and + * https://www.cs.kau.se/pulls/hot/thebasketcase-wtfpad/), the burst + * state serves to detect bursts in traffic. This is done by using longer + * delays in its histogram, which represent the expected delays between + * bursts of packets in the target stream. If this delay expires without a + * real packet being sent, the burst state sends a padding packet and then + * immediately transitions to the gap state, which is used to generate + * a synthetic padding packet train. In this implementation, this transition + * needs to be explicitly specified in the burst state's transition events. + * + * Because of this flexibility, other padding mechanisms can transition + * between these two states arbitrarily, to encode other dynamics of + * target traffic. + */ +#define CIRCPAD_STATE_BURST 1 + +/** + * The gap state for this machine. + * + * In the original Adaptive Padding algorithm and in WTF-PAD, the gap + * state serves to simulate an artificial packet train composed of padding + * packets. It does this by specifying much lower inter-packet delays than + * the burst state, and transitioning back to itself after padding is sent + * if these timers expire before real traffic is sent. If real traffic is + * sent, it transitions back to the burst state. + * + * Again, in this implementation, these transitions must be specified + * explicitly, and other transitions are also permitted. + */ +#define CIRCPAD_STATE_GAP 2 + +/** + * End is a pseudo-state that causes the machine to go completely + * idle, and optionally get torn down (depending on the + * value of circpad_machine_t.should_negotiate_end) + * + * End MUST NOT occupy a slot in the machine state array. + */ +#define CIRCPAD_STATE_END CIRCPAD_STATENUM_MAX + +/** + * "Ignore" is a pseudo-state that means "do not react to this + * event". + * + * "Ignore" MUST NOT occupy a slot in the machine state array. + */ +#define CIRCPAD_STATE_IGNORE (CIRCPAD_STATENUM_MAX-1) + +/** + * "Cancel" is a pseudo-state that means "cancel pending timers, + * but remain in your current state". + * + * Cancel MUST NOT occupy a slot in the machine state array. + */ +#define CIRCPAD_STATE_CANCEL (CIRCPAD_STATENUM_MAX-2) + +/** + * Since we have 3 pseudo-states, the max state array length is + * up to one less than cancel's statenum. + */ +#define CIRCPAD_MAX_MACHINE_STATES (CIRCPAD_STATE_CANCEL-1) + +/** + * Mutable padding machine info. + * + * This structure contains mutable information about a padding + * machine. The mutable information must be kept separate because + * it exists per-circuit, where as the machines themselves are global. + * This separation is done to conserve space in the circuit structure. + * + * This is the per-circuit state that changes regarding the global state + * machine. Some parts of it are optional (ie NULL). + * + * XXX: Play with layout to minimize space on x64 Linux (most common relay). + */ +typedef struct circpad_machineinfo_t { + /** The callback pointer for the padding callbacks. + * + * These timers stick around the machineinfo until the machineinfo's circuit + * is closed, at which point the timer is cancelled. For this reason it's + * safe to assume that the machineinfo exists if this timer gets + * triggered. */ + tor_timer_t *padding_timer; + + /** The circuit for this machine */ + circuit_t *on_circ; + + /** A mutable copy of the histogram for the current state. + * NULL if remove_tokens is false for that state */ + circpad_hist_token_t *histogram; + /** Length of the above histogram. + * XXX: This field *could* be removed at the expense of added + * complexity+overhead for reaching back into the immutable machine + * state every time we need to inspect the histogram. It's only a byte, + * though, so it seemed worth it. + */ + circpad_hist_index_t histogram_len; + /** Remove token from this index upon sending padding */ + circpad_hist_index_t chosen_bin; + + /** Stop padding/transition if this many cells sent */ + uint64_t state_length; +#define CIRCPAD_STATE_LENGTH_INFINITE UINT64_MAX + + /** A scaled count of padding packets sent, used to limit padding overhead. + * When this reaches UINT16_MAX, we cut it and nonpadding_sent in half. */ + uint16_t padding_sent; + /** A scaled count of non-padding packets sent, used to limit padding + * overhead. When this reaches UINT16_MAX, we cut it and padding_sent in + * half. */ + uint16_t nonpadding_sent; + + /** + * EWMA estimate of the RTT of the circuit from this hop + * to the exit end, in microseconds. */ + circpad_delay_t rtt_estimate_usec; + + /** + * The last time we got an event relevant to estimating + * the RTT. Monotonic time in microseconds since system + * start. + */ + circpad_time_t last_received_time_usec; + + /** + * The time at which we scheduled a non-padding packet, + * or selected an infinite delay. + * + * Monotonic time in microseconds since system start. + * This is 0 if we haven't chosen a padding delay. + */ + circpad_time_t padding_scheduled_at_usec; + + /** What state is this machine in? */ + circpad_statenum_t current_state; + + /** + * True if we have scheduled a timer for padding. + * + * This is 1 if a timer is pending. It is 0 if + * no timer is scheduled. (It can be 0 even when + * padding_was_scheduled_at_usec is non-zero). + */ + unsigned is_padding_timer_scheduled : 1; + + /** + * If this is true, we have seen full duplex behavior. + * Stop updating the RTT. + */ + unsigned stop_rtt_update : 1; + +/** Max number of padding machines on each circuit. If changed, + * also ensure the machine_index bitwith supports the new size. */ +#define CIRCPAD_MAX_MACHINES (2) + /** Which padding machine index was this for. + * (make sure changes to the bitwidth can support the + * CIRCPAD_MAX_MACHINES define). */ + unsigned machine_index : 1; + +} circpad_machineinfo_t; + +/** Helper macro to get an actual state machine from a machineinfo */ +#define CIRCPAD_GET_MACHINE(machineinfo) \ + ((machineinfo)->on_circ->padding_machine[(machineinfo)->machine_index]) + +/** + * This specifies a particular padding machine to use after negotiation. + * + * The constants for machine_num_t are in trunnel. + * We want to be able to define extra numbers in the consensus/torrc, though. + */ +typedef uint8_t circpad_machine_num_t; + +/** Global state machine structure from the consensus */ +typedef struct circpad_machine_t { + /** Global machine number */ + circpad_machine_num_t machine_num; + + /** Which machine index slot should this machine go into in + * the array on the circuit_t */ + unsigned machine_index : 1; + + /** Send a padding negotiate to shut down machine at end state? */ + unsigned should_negotiate_end : 1; + + // These next three fields are origin machine-only... + /** Origin side or relay side */ + unsigned is_origin_side : 1; + + /** Which hop in the circuit should we send padding to/from? + * 1-indexed (ie: hop #1 is guard, #2 middle, #3 exit). */ + unsigned target_hopnum : 3; + + /** This machine only kills fascists if the following conditions are met. */ + circpad_machine_conditions_t conditions; + + /** How many padding cells can be sent before we apply overhead limits? + * XXX: Note that we can only allow up to 64k of padding cells on an + * otherwise quiet circuit. Is this enough? It's 33MB. */ + uint16_t allowed_padding_count; + + /** Padding percent cap: Stop padding if we exceed this percent overhead. + * 0 means no limit. Overhead is defined as percent of total traffic, so + * that we can use 0..100 here. This is the same definition as used in + * Prop#265. */ + uint8_t max_padding_percent; + + /** State array: indexed by circpad_statenum_t */ + circpad_state_t *states; + + /** + * Number of states this machine has (ie: length of the states array). + * XXX: This field is not needed other than for safety. */ + circpad_statenum_t num_states; +} circpad_machine_t; + +void circpad_new_consensus_params(const networkstatus_t *ns); + +/** + * The following are event call-in points that are of interest to + * the state machines. They are called during cell processing. */ +void circpad_deliver_unrecognized_cell_events(circuit_t *circ, + cell_direction_t dir); +void circpad_deliver_sent_relay_cell_events(circuit_t *circ, + uint8_t relay_command); +void circpad_deliver_recognized_relay_cell_events(circuit_t *circ, + uint8_t relay_command, + crypt_path_t *layer_hint); + +/** Cell events are delivered by the above delivery functions */ +void circpad_cell_event_nonpadding_sent(circuit_t *on_circ); +void circpad_cell_event_nonpadding_received(circuit_t *on_circ); +void circpad_cell_event_padding_sent(circuit_t *on_circ); +void circpad_cell_event_padding_received(circuit_t *on_circ); + +/** Internal events are events the machines send to themselves */ +circpad_decision_t circpad_internal_event_infinity(circpad_machineinfo_t *mi); +circpad_decision_t circpad_internal_event_bins_empty(circpad_machineinfo_t *); +circpad_decision_t circpad_internal_event_state_length_up( + circpad_machineinfo_t *); + +/** Machine creation events are events that cause us to set up or + * tear down padding state machines. */ +void circpad_machine_event_circ_added_hop(origin_circuit_t *on_circ); +void circpad_machine_event_circ_built(origin_circuit_t *circ); +void circpad_machine_event_circ_purpose_changed(origin_circuit_t *circ); +void circpad_machine_event_circ_has_streams(origin_circuit_t *circ); +void circpad_machine_event_circ_has_no_streams(origin_circuit_t *circ); +void circpad_machine_event_circ_has_no_relay_early(origin_circuit_t *circ); + +void circpad_machines_init(void); +void circpad_machines_free(void); + +void circpad_machine_states_init(circpad_machine_t *machine, + circpad_statenum_t num_states); + +void circpad_circuit_free_all_machineinfos(circuit_t *circ); + +bool circpad_padding_is_from_expected_hop(circuit_t *circ, + crypt_path_t *from_hop); + +/** Serializaton functions for writing to/from torrc and consensus */ +char *circpad_machine_to_string(const circpad_machine_t *machine); +const circpad_machine_t *circpad_string_to_machine(const char *str); + +/* Padding negotiation between client and middle */ +signed_error_t circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell); +signed_error_t circpad_handle_padding_negotiated(circuit_t *circ, cell_t *cell, + crypt_path_t *layer_hint); +signed_error_t circpad_negotiate_padding(origin_circuit_t *circ, + circpad_machine_num_t machine, + uint8_t target_hopnum, + uint8_t command); +bool circpad_padding_negotiated(circuit_t *circ, + circpad_machine_num_t machine, + uint8_t command, + uint8_t response); + +MOCK_DECL(circpad_decision_t, +circpad_machine_schedule_padding,(circpad_machineinfo_t *)); + +MOCK_DECL(circpad_decision_t, +circpad_machine_transition, (circpad_machineinfo_t *mi, + circpad_event_t event)); + +circpad_decision_t circpad_send_padding_cell_for_callback( + circpad_machineinfo_t *mi); + +#ifdef CIRCUITPADDING_PRIVATE +STATIC circpad_delay_t +circpad_machine_sample_delay(circpad_machineinfo_t *mi); + +STATIC bool +circpad_machine_reached_padding_limit(circpad_machineinfo_t *mi); + +STATIC +circpad_decision_t circpad_machine_remove_token(circpad_machineinfo_t *mi); + +STATIC circpad_delay_t +circpad_histogram_bin_to_usec(const circpad_machineinfo_t *mi, + circpad_hist_index_t bin); + +STATIC const circpad_state_t * +circpad_machine_current_state(const circpad_machineinfo_t *mi); + +STATIC circpad_hist_index_t circpad_histogram_usec_to_bin( + const circpad_machineinfo_t *mi, + circpad_delay_t us); + +STATIC circpad_machineinfo_t *circpad_circuit_machineinfo_new( + circuit_t *on_circ, + int machine_index); +STATIC void circpad_machine_remove_higher_token(circpad_machineinfo_t *mi, + circpad_delay_t target_bin_us); +STATIC void circpad_machine_remove_lower_token(circpad_machineinfo_t *mi, + circpad_delay_t target_bin_us); +STATIC void circpad_machine_remove_closest_token(circpad_machineinfo_t *mi, + circpad_delay_t target_bin_us, + bool use_usec); +STATIC void circpad_machine_setup_tokens(circpad_machineinfo_t *mi); + +#ifdef TOR_UNIT_TESTS +extern smartlist_t *origin_padding_machines; +extern smartlist_t *relay_padding_machines; +#endif + +#endif + +#endif -- cgit v1.2.3-54-g00ecf From 2f7b5a2d4468532251dd7b177c02a9c192690174 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 23 Oct 2018 19:51:22 +0000 Subject: Hook up circuit padding to circuit_t. Co-authored-by: George Kadianakis --- src/core/or/circuit_st.h | 24 ++++++++++++++++++++++++ src/core/or/circuitlist.c | 4 ++++ src/core/or/origin_circuit_st.h | 4 ++++ 3 files changed, 32 insertions(+) diff --git a/src/core/or/circuit_st.h b/src/core/or/circuit_st.h index 2e33b37b01..0d9ad3cdd5 100644 --- a/src/core/or/circuit_st.h +++ b/src/core/or/circuit_st.h @@ -12,6 +12,11 @@ #include "core/or/cell_queue_st.h" struct hs_token_t; +typedef struct circpad_machine_t circpad_machine_t; +typedef struct circpad_machineinfo_t circpad_machineinfo_t; + +/** Number of padding state machines on a circuit. */ +#define CIRCPAD_MAX_MACHINES (2) /** "magic" value for an origin_circuit_t */ #define ORIGIN_CIRCUIT_MAGIC 0x35315243u @@ -177,6 +182,25 @@ struct circuit_t { /** Hashtable node: used to look up the circuit by its HS token using the HS circuitmap. */ HT_ENTRY(circuit_t) hs_circuitmap_node; + + /** Adaptive Padding state machines: these are immutable. The state machines + * that come from the consensus are saved to a global structure, to avoid + * per-circuit allocations. This merely points to the global copy. + * + * Each element of this array corresponds to a different padding machine, + * and we can have up to CIRCPAD_MAX_MACHINES such machines. */ + const circpad_machine_t *padding_machine[CIRCPAD_MAX_MACHINES]; + + /** Adaptive Padding machine info for above machines. This is the + * per-circuit mutable information, such as the current state and + * histogram token counts. Some of it is optional (aka NULL). + * If a machine is being shut down, these indexes can be NULL + * without the corresponding padding_machine being NULL, while we + * wait for the other end to respond to our shutdown request. + * + * Each element of this array corresponds to a different padding machine, + * and we can have up to CIRCPAD_MAX_MACHINES such machines. */ + circpad_machineinfo_t *padding_info[CIRCPAD_MAX_MACHINES]; }; #endif diff --git a/src/core/or/circuitlist.c b/src/core/or/circuitlist.c index c4b5f7ee3e..71f8becddc 100644 --- a/src/core/or/circuitlist.c +++ b/src/core/or/circuitlist.c @@ -62,6 +62,7 @@ #include "core/or/circuitlist.h" #include "core/or/circuituse.h" #include "core/or/circuitstats.h" +#include "core/or/circuitpadding.h" #include "core/mainloop/connection.h" #include "app/config/config.h" #include "core/or/connection_edge.h" @@ -1231,6 +1232,9 @@ circuit_free_(circuit_t *circ) CIRCUIT_IS_ORIGIN(circ) ? TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0); + /* Free any circuit padding structures */ + circpad_circuit_free_all_machineinfos(circ); + if (should_free) { memwipe(mem, 0xAA, memlen); /* poison memory */ tor_free(mem); diff --git a/src/core/or/origin_circuit_st.h b/src/core/or/origin_circuit_st.h index 26cdf590f1..921076c1b9 100644 --- a/src/core/or/origin_circuit_st.h +++ b/src/core/or/origin_circuit_st.h @@ -161,6 +161,10 @@ struct origin_circuit_t { * connections to this circuit. */ unsigned int unusable_for_new_conns : 1; + /* If this flag is set (due to padding negotiation failure), we should + * not try to negotiate further circuit padding. */ + unsigned padding_negotiation_failed : 1; + /** * Tristate variable to guard against pathbias miscounting * due to circuit purpose transitions changing the decision -- cgit v1.2.3-54-g00ecf From 70e9245f6feecceee96f0ea8d426e1a5a6fc9b8d Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 23 Oct 2018 20:52:43 +0000 Subject: Initialize circuit padding machines and global state. Co-authored-by: George Kadianakis --- src/app/main/main.c | 10 ++++++++-- src/feature/nodelist/networkstatus.c | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/app/main/main.c b/src/app/main/main.c index d71e43ec30..ba2dfebd77 100644 --- a/src/app/main/main.c +++ b/src/app/main/main.c @@ -22,6 +22,7 @@ #include "core/mainloop/netstatus.h" #include "core/or/channel.h" #include "core/or/channelpadding.h" +#include "core/or/circuitpadding.h" #include "core/or/channeltls.h" #include "core/or/circuitlist.h" #include "core/or/circuitmux_ewma.h" @@ -645,9 +646,13 @@ tor_init(int argc, char *argv[]) /* The options are now initialised */ const or_options_t *options = get_options(); - /* Initialize channelpadding parameters to defaults until we get - * a consensus */ + /* Initialize channelpadding and circpad parameters to defaults + * until we get a consensus */ channelpadding_new_consensus_params(NULL); + circpad_new_consensus_params(NULL); + + /* Initialize circuit padding to defaults+torrc until we get a consensus */ + circpad_machines_init(); /* Initialize predicted ports list after loading options */ predicted_ports_init(); @@ -766,6 +771,7 @@ tor_free_all(int postfork) dns_free_all(); clear_pending_onions(); circuit_free_all(); + circpad_machines_free(); entry_guards_free_all(); pt_free_all(); channel_tls_free_all(); diff --git a/src/feature/nodelist/networkstatus.c b/src/feature/nodelist/networkstatus.c index 65ea3cc491..3245c8ff16 100644 --- a/src/feature/nodelist/networkstatus.c +++ b/src/feature/nodelist/networkstatus.c @@ -44,6 +44,7 @@ #include "core/mainloop/netstatus.h" #include "core/or/channel.h" #include "core/or/channelpadding.h" +#include "core/or/circuitpadding.h" #include "core/or/circuitmux.h" #include "core/or/circuitmux_ewma.h" #include "core/or/circuitstats.h" @@ -2103,6 +2104,7 @@ networkstatus_set_current_consensus(const char *consensus, circuit_build_times_new_consensus_params( get_circuit_build_times_mutable(), c); channelpadding_new_consensus_params(c); + circpad_new_consensus_params(c); } /* Reset the failure count only if this consensus is actually valid. */ -- cgit v1.2.3-54-g00ecf From 659a4f06d46a0e8e4f391eda3b6d86f2ab6e4db9 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 23 Oct 2018 20:55:10 +0000 Subject: Circuit padding ProtoVer plumbing. This helps us to determine if a middle node can pad to us or not. Co-authored-by: George Kadianakis --- src/core/or/or.h | 4 ++++ src/core/or/protover.c | 7 ++++++- src/core/or/protover.h | 1 + src/core/or/versions.c | 2 ++ src/feature/nodelist/nodelist.c | 2 +- src/rust/protover/protover.rs | 8 ++++++-- 6 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/core/or/or.h b/src/core/or/or.h index ca373d8ed5..f297bc9267 100644 --- a/src/core/or/or.h +++ b/src/core/or/or.h @@ -836,6 +836,10 @@ typedef struct protover_summary_flags_t { * service rendezvous point supporting version 3 as seen in proposal 224. * This requires HSRend=2. */ unsigned int supports_v3_rendezvous_point: 1; + + /** True iff this router has a protocol list that allows clients to + * negotiate link-level padding. Requires Padding>=1. */ + unsigned int supports_padding : 1; } protover_summary_flags_t; typedef struct routerinfo_t routerinfo_t; diff --git a/src/core/or/protover.c b/src/core/or/protover.c index e80fbfae81..c0c09c9d17 100644 --- a/src/core/or/protover.c +++ b/src/core/or/protover.c @@ -39,6 +39,9 @@ static int protocol_list_contains(const smartlist_t *protos, static const struct { protocol_type_t protover_type; const char *name; +/* If you add a new protocol here, you probably also want to add + * parsing for it in routerstatus_parse_entry_from_string() so that + * it is set in routerstatus_t */ } PROTOCOL_NAMES[] = { { PRT_LINK, "Link" }, { PRT_LINKAUTH, "LinkAuth" }, @@ -49,6 +52,7 @@ static const struct { { PRT_HSREND, "HSRend" }, { PRT_DESC, "Desc" }, { PRT_MICRODESC, "Microdesc"}, + { PRT_PADDING, "Padding"}, { PRT_CONS, "Cons" } }; @@ -396,7 +400,8 @@ protover_get_supported_protocols(void) "LinkAuth=3 " #endif "Microdesc=1-2 " - "Relay=1-2"; + "Relay=1-2 " + "Padding=1"; } /** The protocols from protover_get_supported_protocols(), as parsed into a diff --git a/src/core/or/protover.h b/src/core/or/protover.h index 7319d2f8c4..ffd4f2c18e 100644 --- a/src/core/or/protover.h +++ b/src/core/or/protover.h @@ -43,6 +43,7 @@ typedef enum protocol_type_t { PRT_DESC, PRT_MICRODESC, PRT_CONS, + PRT_PADDING, } protocol_type_t; bool protover_contains_long_protocol_names(const char *s); diff --git a/src/core/or/versions.c b/src/core/or/versions.c index 7bd1f5899f..736313a9cd 100644 --- a/src/core/or/versions.c +++ b/src/core/or/versions.c @@ -448,6 +448,8 @@ memoize_protover_summary(protover_summary_flags_t *out, out->supports_v3_rendezvous_point = protocol_list_supports_protocol(protocols, PRT_HSREND, PROTOVER_HS_RENDEZVOUS_POINT_V3); + out->supports_padding = + protocol_list_supports_protocol(protocols, PRT_PADDING, 1); protover_summary_flags_t *new_cached = tor_memdup(out, sizeof(*out)); cached = strmap_set(protover_summary_map, protocols, new_cached); diff --git a/src/feature/nodelist/nodelist.c b/src/feature/nodelist/nodelist.c index d94e73f48f..15b3f7b600 100644 --- a/src/feature/nodelist/nodelist.c +++ b/src/feature/nodelist/nodelist.c @@ -1106,7 +1106,7 @@ node_ed25519_id_matches(const node_t *node, const ed25519_public_key_t *id) /** Dummy object that should be unreturnable. Used to ensure that * node_get_protover_summary_flags() always returns non-NULL. */ static const protover_summary_flags_t zero_protover_flags = { - 0,0,0,0,0,0,0 + 0,0,0,0,0,0,0,0 }; /** Return the protover_summary_flags for a given node. */ diff --git a/src/rust/protover/protover.rs b/src/rust/protover/protover.rs index 8624afeafa..0b2a78c210 100644 --- a/src/rust/protover/protover.rs +++ b/src/rust/protover/protover.rs @@ -46,6 +46,7 @@ pub enum Protocol { LinkAuth, Microdesc, Relay, + Padding, } impl fmt::Display for Protocol { @@ -73,6 +74,7 @@ impl FromStr for Protocol { "LinkAuth" => Ok(Protocol::LinkAuth), "Microdesc" => Ok(Protocol::Microdesc), "Relay" => Ok(Protocol::Relay), + "Padding" => Ok(Protocol::Padding), _ => Err(ProtoverError::UnknownProtocol), } } @@ -163,7 +165,8 @@ pub(crate) fn get_supported_protocols_cstr() -> &'static CStr { Link=1-5 \ LinkAuth=3 \ Microdesc=1-2 \ - Relay=1-2" + Relay=1-2 \ + Padding=1" ) } else { cstr!( @@ -176,7 +179,8 @@ pub(crate) fn get_supported_protocols_cstr() -> &'static CStr { Link=1-5 \ LinkAuth=1,3 \ Microdesc=1-2 \ - Relay=1-2" + Relay=1-2 \ + Padding=1" ) } } -- cgit v1.2.3-54-g00ecf From 4ca1df6b326f32fcc87d7d3fd6294221f6964235 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 23 Oct 2018 19:51:35 +0000 Subject: Add padding negotiation trunnel output. Co-authored-by: George Kadianakis --- src/trunnel/circpad_negotiation.c | 549 ++++++++++++++++++++++++++++++++ src/trunnel/circpad_negotiation.h | 195 ++++++++++++ src/trunnel/circpad_negotiation.trunnel | 44 +++ src/trunnel/include.am | 11 +- 4 files changed, 795 insertions(+), 4 deletions(-) create mode 100644 src/trunnel/circpad_negotiation.c create mode 100644 src/trunnel/circpad_negotiation.h create mode 100644 src/trunnel/circpad_negotiation.trunnel diff --git a/src/trunnel/circpad_negotiation.c b/src/trunnel/circpad_negotiation.c new file mode 100644 index 0000000000..236be06ada --- /dev/null +++ b/src/trunnel/circpad_negotiation.c @@ -0,0 +1,549 @@ +/* circpad_negotiation.c -- generated by Trunnel v1.5.2. + * https://gitweb.torproject.org/trunnel.git + * You probably shouldn't edit this file. + */ +#include +#include "trunnel-impl.h" + +#include "circpad_negotiation.h" + +#define TRUNNEL_SET_ERROR_CODE(obj) \ + do { \ + (obj)->trunnel_error_code_ = 1; \ + } while (0) + +#if defined(__COVERITY__) || defined(__clang_analyzer__) +/* If we're running a static analysis tool, we don't want it to complain + * that some of our remaining-bytes checks are dead-code. */ +int circpadnegotiation_deadcode_dummy__ = 0; +#define OR_DEADCODE_DUMMY || circpadnegotiation_deadcode_dummy__ +#else +#define OR_DEADCODE_DUMMY +#endif + +#define CHECK_REMAINING(nbytes, label) \ + do { \ + if (remaining < (nbytes) OR_DEADCODE_DUMMY) { \ + goto label; \ + } \ + } while (0) + +circpad_negotiate_t * +circpad_negotiate_new(void) +{ + circpad_negotiate_t *val = trunnel_calloc(1, sizeof(circpad_negotiate_t)); + if (NULL == val) + return NULL; + val->command = CIRCPAD_COMMAND_START; + return val; +} + +/** Release all storage held inside 'obj', but do not free 'obj'. + */ +static void +circpad_negotiate_clear(circpad_negotiate_t *obj) +{ + (void) obj; +} + +void +circpad_negotiate_free(circpad_negotiate_t *obj) +{ + if (obj == NULL) + return; + circpad_negotiate_clear(obj); + trunnel_memwipe(obj, sizeof(circpad_negotiate_t)); + trunnel_free_(obj); +} + +uint8_t +circpad_negotiate_get_version(const circpad_negotiate_t *inp) +{ + return inp->version; +} +int +circpad_negotiate_set_version(circpad_negotiate_t *inp, uint8_t val) +{ + if (! ((val == 0))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->version = val; + return 0; +} +uint8_t +circpad_negotiate_get_command(const circpad_negotiate_t *inp) +{ + return inp->command; +} +int +circpad_negotiate_set_command(circpad_negotiate_t *inp, uint8_t val) +{ + if (! ((val == CIRCPAD_COMMAND_START || val == CIRCPAD_COMMAND_STOP))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->command = val; + return 0; +} +uint8_t +circpad_negotiate_get_machine_type(const circpad_negotiate_t *inp) +{ + return inp->machine_type; +} +int +circpad_negotiate_set_machine_type(circpad_negotiate_t *inp, uint8_t val) +{ + inp->machine_type = val; + return 0; +} +uint8_t +circpad_negotiate_get_echo_request(const circpad_negotiate_t *inp) +{ + return inp->echo_request; +} +int +circpad_negotiate_set_echo_request(circpad_negotiate_t *inp, uint8_t val) +{ + if (! ((val == 0 || val == 1))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->echo_request = val; + return 0; +} +const char * +circpad_negotiate_check(const circpad_negotiate_t *obj) +{ + if (obj == NULL) + return "Object was NULL"; + if (obj->trunnel_error_code_) + return "A set function failed on this object"; + if (! (obj->version == 0)) + return "Integer out of bounds"; + if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP)) + return "Integer out of bounds"; + if (! (obj->echo_request == 0 || obj->echo_request == 1)) + return "Integer out of bounds"; + return NULL; +} + +ssize_t +circpad_negotiate_encoded_len(const circpad_negotiate_t *obj) +{ + ssize_t result = 0; + + if (NULL != circpad_negotiate_check(obj)) + return -1; + + + /* Length of u8 version IN [0] */ + result += 1; + + /* Length of u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + result += 1; + + /* Length of u8 machine_type */ + result += 1; + + /* Length of u8 echo_request IN [0, 1] */ + result += 1; + return result; +} +int +circpad_negotiate_clear_errors(circpad_negotiate_t *obj) +{ + int r = obj->trunnel_error_code_; + obj->trunnel_error_code_ = 0; + return r; +} +ssize_t +circpad_negotiate_encode(uint8_t *output, const size_t avail, const circpad_negotiate_t *obj) +{ + ssize_t result = 0; + size_t written = 0; + uint8_t *ptr = output; + const char *msg; +#ifdef TRUNNEL_CHECK_ENCODED_LEN + const ssize_t encoded_len = circpad_negotiate_encoded_len(obj); +#endif + + if (NULL != (msg = circpad_negotiate_check(obj))) + goto check_failed; + +#ifdef TRUNNEL_CHECK_ENCODED_LEN + trunnel_assert(encoded_len >= 0); +#endif + + /* Encode u8 version IN [0] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->version)); + written += 1; ptr += 1; + + /* Encode u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->command)); + written += 1; ptr += 1; + + /* Encode u8 machine_type */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->machine_type)); + written += 1; ptr += 1; + + /* Encode u8 echo_request IN [0, 1] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->echo_request)); + written += 1; ptr += 1; + + + trunnel_assert(ptr == output + written); +#ifdef TRUNNEL_CHECK_ENCODED_LEN + { + trunnel_assert(encoded_len >= 0); + trunnel_assert((size_t)encoded_len == written); + } + +#endif + + return written; + + truncated: + result = -2; + goto fail; + check_failed: + (void)msg; + result = -1; + goto fail; + fail: + trunnel_assert(result < 0); + return result; +} + +/** As circpad_negotiate_parse(), but do not allocate the output + * object. + */ +static ssize_t +circpad_negotiate_parse_into(circpad_negotiate_t *obj, const uint8_t *input, const size_t len_in) +{ + const uint8_t *ptr = input; + size_t remaining = len_in; + ssize_t result = 0; + (void)result; + + /* Parse u8 version IN [0] */ + CHECK_REMAINING(1, truncated); + obj->version = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->version == 0)) + goto fail; + + /* Parse u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + CHECK_REMAINING(1, truncated); + obj->command = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP)) + goto fail; + + /* Parse u8 machine_type */ + CHECK_REMAINING(1, truncated); + obj->machine_type = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + + /* Parse u8 echo_request IN [0, 1] */ + CHECK_REMAINING(1, truncated); + obj->echo_request = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->echo_request == 0 || obj->echo_request == 1)) + goto fail; + trunnel_assert(ptr + remaining == input + len_in); + return len_in - remaining; + + truncated: + return -2; + fail: + result = -1; + return result; +} + +ssize_t +circpad_negotiate_parse(circpad_negotiate_t **output, const uint8_t *input, const size_t len_in) +{ + ssize_t result; + *output = circpad_negotiate_new(); + if (NULL == *output) + return -1; + result = circpad_negotiate_parse_into(*output, input, len_in); + if (result < 0) { + circpad_negotiate_free(*output); + *output = NULL; + } + return result; +} +circpad_negotiated_t * +circpad_negotiated_new(void) +{ + circpad_negotiated_t *val = trunnel_calloc(1, sizeof(circpad_negotiated_t)); + if (NULL == val) + return NULL; + val->command = CIRCPAD_COMMAND_START; + val->response = CIRCPAD_RESPONSE_ERR; + return val; +} + +/** Release all storage held inside 'obj', but do not free 'obj'. + */ +static void +circpad_negotiated_clear(circpad_negotiated_t *obj) +{ + (void) obj; +} + +void +circpad_negotiated_free(circpad_negotiated_t *obj) +{ + if (obj == NULL) + return; + circpad_negotiated_clear(obj); + trunnel_memwipe(obj, sizeof(circpad_negotiated_t)); + trunnel_free_(obj); +} + +uint8_t +circpad_negotiated_get_version(const circpad_negotiated_t *inp) +{ + return inp->version; +} +int +circpad_negotiated_set_version(circpad_negotiated_t *inp, uint8_t val) +{ + if (! ((val == 0))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->version = val; + return 0; +} +uint8_t +circpad_negotiated_get_command(const circpad_negotiated_t *inp) +{ + return inp->command; +} +int +circpad_negotiated_set_command(circpad_negotiated_t *inp, uint8_t val) +{ + if (! ((val == CIRCPAD_COMMAND_START || val == CIRCPAD_COMMAND_STOP))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->command = val; + return 0; +} +uint8_t +circpad_negotiated_get_response(const circpad_negotiated_t *inp) +{ + return inp->response; +} +int +circpad_negotiated_set_response(circpad_negotiated_t *inp, uint8_t val) +{ + if (! ((val == CIRCPAD_RESPONSE_ERR || val == CIRCPAD_RESPONSE_OK))) { + TRUNNEL_SET_ERROR_CODE(inp); + return -1; + } + inp->response = val; + return 0; +} +uint8_t +circpad_negotiated_get_machine_type(const circpad_negotiated_t *inp) +{ + return inp->machine_type; +} +int +circpad_negotiated_set_machine_type(circpad_negotiated_t *inp, uint8_t val) +{ + inp->machine_type = val; + return 0; +} +const char * +circpad_negotiated_check(const circpad_negotiated_t *obj) +{ + if (obj == NULL) + return "Object was NULL"; + if (obj->trunnel_error_code_) + return "A set function failed on this object"; + if (! (obj->version == 0)) + return "Integer out of bounds"; + if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP)) + return "Integer out of bounds"; + if (! (obj->response == CIRCPAD_RESPONSE_ERR || obj->response == CIRCPAD_RESPONSE_OK)) + return "Integer out of bounds"; + return NULL; +} + +ssize_t +circpad_negotiated_encoded_len(const circpad_negotiated_t *obj) +{ + ssize_t result = 0; + + if (NULL != circpad_negotiated_check(obj)) + return -1; + + + /* Length of u8 version IN [0] */ + result += 1; + + /* Length of u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + result += 1; + + /* Length of u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */ + result += 1; + + /* Length of u8 machine_type */ + result += 1; + return result; +} +int +circpad_negotiated_clear_errors(circpad_negotiated_t *obj) +{ + int r = obj->trunnel_error_code_; + obj->trunnel_error_code_ = 0; + return r; +} +ssize_t +circpad_negotiated_encode(uint8_t *output, const size_t avail, const circpad_negotiated_t *obj) +{ + ssize_t result = 0; + size_t written = 0; + uint8_t *ptr = output; + const char *msg; +#ifdef TRUNNEL_CHECK_ENCODED_LEN + const ssize_t encoded_len = circpad_negotiated_encoded_len(obj); +#endif + + if (NULL != (msg = circpad_negotiated_check(obj))) + goto check_failed; + +#ifdef TRUNNEL_CHECK_ENCODED_LEN + trunnel_assert(encoded_len >= 0); +#endif + + /* Encode u8 version IN [0] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->version)); + written += 1; ptr += 1; + + /* Encode u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->command)); + written += 1; ptr += 1; + + /* Encode u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->response)); + written += 1; ptr += 1; + + /* Encode u8 machine_type */ + trunnel_assert(written <= avail); + if (avail - written < 1) + goto truncated; + trunnel_set_uint8(ptr, (obj->machine_type)); + written += 1; ptr += 1; + + + trunnel_assert(ptr == output + written); +#ifdef TRUNNEL_CHECK_ENCODED_LEN + { + trunnel_assert(encoded_len >= 0); + trunnel_assert((size_t)encoded_len == written); + } + +#endif + + return written; + + truncated: + result = -2; + goto fail; + check_failed: + (void)msg; + result = -1; + goto fail; + fail: + trunnel_assert(result < 0); + return result; +} + +/** As circpad_negotiated_parse(), but do not allocate the output + * object. + */ +static ssize_t +circpad_negotiated_parse_into(circpad_negotiated_t *obj, const uint8_t *input, const size_t len_in) +{ + const uint8_t *ptr = input; + size_t remaining = len_in; + ssize_t result = 0; + (void)result; + + /* Parse u8 version IN [0] */ + CHECK_REMAINING(1, truncated); + obj->version = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->version == 0)) + goto fail; + + /* Parse u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */ + CHECK_REMAINING(1, truncated); + obj->command = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP)) + goto fail; + + /* Parse u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */ + CHECK_REMAINING(1, truncated); + obj->response = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + if (! (obj->response == CIRCPAD_RESPONSE_ERR || obj->response == CIRCPAD_RESPONSE_OK)) + goto fail; + + /* Parse u8 machine_type */ + CHECK_REMAINING(1, truncated); + obj->machine_type = (trunnel_get_uint8(ptr)); + remaining -= 1; ptr += 1; + trunnel_assert(ptr + remaining == input + len_in); + return len_in - remaining; + + truncated: + return -2; + fail: + result = -1; + return result; +} + +ssize_t +circpad_negotiated_parse(circpad_negotiated_t **output, const uint8_t *input, const size_t len_in) +{ + ssize_t result; + *output = circpad_negotiated_new(); + if (NULL == *output) + return -1; + result = circpad_negotiated_parse_into(*output, input, len_in); + if (result < 0) { + circpad_negotiated_free(*output); + *output = NULL; + } + return result; +} diff --git a/src/trunnel/circpad_negotiation.h b/src/trunnel/circpad_negotiation.h new file mode 100644 index 0000000000..d09080dc16 --- /dev/null +++ b/src/trunnel/circpad_negotiation.h @@ -0,0 +1,195 @@ +/* circpad_negotiation.h -- generated by Trunnel v1.5.2. + * https://gitweb.torproject.org/trunnel.git + * You probably shouldn't edit this file. + */ +#ifndef TRUNNEL_CIRCPAD_NEGOTIATION_H +#define TRUNNEL_CIRCPAD_NEGOTIATION_H + +#include +#include "trunnel.h" + +#define CIRCPAD_COMMAND_STOP 1 +#define CIRCPAD_COMMAND_START 2 +#define CIRCPAD_RESPONSE_OK 1 +#define CIRCPAD_RESPONSE_ERR 2 +#define CIRCPAD_MACHINE_CIRC_SETUP 1 +/** + * This command tells the relay to alter its min and max netflow + * timeout range values, and send padding at that rate (resuming + * if stopped). */ +#if !defined(TRUNNEL_OPAQUE) && !defined(TRUNNEL_OPAQUE_CIRCPAD_NEGOTIATE) +struct circpad_negotiate_st { + uint8_t version; + uint8_t command; + /** Machine type is left unbounded because we can specify + * new machines in the consensus */ + uint8_t machine_type; + /** If true, send a relay_drop reply.. */ + uint8_t echo_request; + uint8_t trunnel_error_code_; +}; +#endif +typedef struct circpad_negotiate_st circpad_negotiate_t; +/** + * This command tells the relay to alter its min and max netflow + * timeout range values, and send padding at that rate (resuming + * if stopped). */ +#if !defined(TRUNNEL_OPAQUE) && !defined(TRUNNEL_OPAQUE_CIRCPAD_NEGOTIATED) +struct circpad_negotiated_st { + uint8_t version; + uint8_t command; + uint8_t response; + /** Machine type is left unbounded because we can specify + * new machines in the consensus */ + uint8_t machine_type; + uint8_t trunnel_error_code_; +}; +#endif +typedef struct circpad_negotiated_st circpad_negotiated_t; +/** Return a newly allocated circpad_negotiate with all elements set + * to zero. + */ +circpad_negotiate_t *circpad_negotiate_new(void); +/** Release all storage held by the circpad_negotiate in 'victim'. (Do + * nothing if 'victim' is NULL.) + */ +void circpad_negotiate_free(circpad_negotiate_t *victim); +/** Try to parse a circpad_negotiate from the buffer in 'input', using + * up to 'len_in' bytes from the input buffer. On success, return the + * number of bytes consumed and set *output to the newly allocated + * circpad_negotiate_t. On failure, return -2 if the input appears + * truncated, and -1 if the input is otherwise invalid. + */ +ssize_t circpad_negotiate_parse(circpad_negotiate_t **output, const uint8_t *input, const size_t len_in); +/** Return the number of bytes we expect to need to encode the + * circpad_negotiate in 'obj'. On failure, return a negative value. + * Note that this value may be an overestimate, and can even be an + * underestimate for certain unencodeable objects. + */ +ssize_t circpad_negotiate_encoded_len(const circpad_negotiate_t *obj); +/** Try to encode the circpad_negotiate from 'input' into the buffer + * at 'output', using up to 'avail' bytes of the output buffer. On + * success, return the number of bytes used. On failure, return -2 if + * the buffer was not long enough, and -1 if the input was invalid. + */ +ssize_t circpad_negotiate_encode(uint8_t *output, size_t avail, const circpad_negotiate_t *input); +/** Check whether the internal state of the circpad_negotiate in 'obj' + * is consistent. Return NULL if it is, and a short message if it is + * not. + */ +const char *circpad_negotiate_check(const circpad_negotiate_t *obj); +/** Clear any errors that were set on the object 'obj' by its setter + * functions. Return true iff errors were cleared. + */ +int circpad_negotiate_clear_errors(circpad_negotiate_t *obj); +/** Return the value of the version field of the circpad_negotiate_t + * in 'inp' + */ +uint8_t circpad_negotiate_get_version(const circpad_negotiate_t *inp); +/** Set the value of the version field of the circpad_negotiate_t in + * 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiate_set_version(circpad_negotiate_t *inp, uint8_t val); +/** Return the value of the command field of the circpad_negotiate_t + * in 'inp' + */ +uint8_t circpad_negotiate_get_command(const circpad_negotiate_t *inp); +/** Set the value of the command field of the circpad_negotiate_t in + * 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiate_set_command(circpad_negotiate_t *inp, uint8_t val); +/** Return the value of the machine_type field of the + * circpad_negotiate_t in 'inp' + */ +uint8_t circpad_negotiate_get_machine_type(const circpad_negotiate_t *inp); +/** Set the value of the machine_type field of the circpad_negotiate_t + * in 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiate_set_machine_type(circpad_negotiate_t *inp, uint8_t val); +/** Return the value of the echo_request field of the + * circpad_negotiate_t in 'inp' + */ +uint8_t circpad_negotiate_get_echo_request(const circpad_negotiate_t *inp); +/** Set the value of the echo_request field of the circpad_negotiate_t + * in 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiate_set_echo_request(circpad_negotiate_t *inp, uint8_t val); +/** Return a newly allocated circpad_negotiated with all elements set + * to zero. + */ +circpad_negotiated_t *circpad_negotiated_new(void); +/** Release all storage held by the circpad_negotiated in 'victim'. + * (Do nothing if 'victim' is NULL.) + */ +void circpad_negotiated_free(circpad_negotiated_t *victim); +/** Try to parse a circpad_negotiated from the buffer in 'input', + * using up to 'len_in' bytes from the input buffer. On success, + * return the number of bytes consumed and set *output to the newly + * allocated circpad_negotiated_t. On failure, return -2 if the input + * appears truncated, and -1 if the input is otherwise invalid. + */ +ssize_t circpad_negotiated_parse(circpad_negotiated_t **output, const uint8_t *input, const size_t len_in); +/** Return the number of bytes we expect to need to encode the + * circpad_negotiated in 'obj'. On failure, return a negative value. + * Note that this value may be an overestimate, and can even be an + * underestimate for certain unencodeable objects. + */ +ssize_t circpad_negotiated_encoded_len(const circpad_negotiated_t *obj); +/** Try to encode the circpad_negotiated from 'input' into the buffer + * at 'output', using up to 'avail' bytes of the output buffer. On + * success, return the number of bytes used. On failure, return -2 if + * the buffer was not long enough, and -1 if the input was invalid. + */ +ssize_t circpad_negotiated_encode(uint8_t *output, size_t avail, const circpad_negotiated_t *input); +/** Check whether the internal state of the circpad_negotiated in + * 'obj' is consistent. Return NULL if it is, and a short message if + * it is not. + */ +const char *circpad_negotiated_check(const circpad_negotiated_t *obj); +/** Clear any errors that were set on the object 'obj' by its setter + * functions. Return true iff errors were cleared. + */ +int circpad_negotiated_clear_errors(circpad_negotiated_t *obj); +/** Return the value of the version field of the circpad_negotiated_t + * in 'inp' + */ +uint8_t circpad_negotiated_get_version(const circpad_negotiated_t *inp); +/** Set the value of the version field of the circpad_negotiated_t in + * 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiated_set_version(circpad_negotiated_t *inp, uint8_t val); +/** Return the value of the command field of the circpad_negotiated_t + * in 'inp' + */ +uint8_t circpad_negotiated_get_command(const circpad_negotiated_t *inp); +/** Set the value of the command field of the circpad_negotiated_t in + * 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiated_set_command(circpad_negotiated_t *inp, uint8_t val); +/** Return the value of the response field of the circpad_negotiated_t + * in 'inp' + */ +uint8_t circpad_negotiated_get_response(const circpad_negotiated_t *inp); +/** Set the value of the response field of the circpad_negotiated_t in + * 'inp' to 'val'. Return 0 on success; return -1 and set the error + * code on 'inp' on failure. + */ +int circpad_negotiated_set_response(circpad_negotiated_t *inp, uint8_t val); +/** Return the value of the machine_type field of the + * circpad_negotiated_t in 'inp' + */ +uint8_t circpad_negotiated_get_machine_type(const circpad_negotiated_t *inp); +/** Set the value of the machine_type field of the + * circpad_negotiated_t in 'inp' to 'val'. Return 0 on success; return + * -1 and set the error code on 'inp' on failure. + */ +int circpad_negotiated_set_machine_type(circpad_negotiated_t *inp, uint8_t val); + + +#endif diff --git a/src/trunnel/circpad_negotiation.trunnel b/src/trunnel/circpad_negotiation.trunnel new file mode 100644 index 0000000000..abbc929cc5 --- /dev/null +++ b/src/trunnel/circpad_negotiation.trunnel @@ -0,0 +1,44 @@ +/* These are the padding negotiation commands */ +const CIRCPAD_COMMAND_STOP = 1; +const CIRCPAD_COMMAND_START = 2; + +/* Responses to commands */ +const CIRCPAD_RESPONSE_OK = 1; +const CIRCPAD_RESPONSE_ERR = 2; + +/* Built-in machine types */ + +/* 1) Machine that obscures circuit setup */ +const CIRCPAD_MACHINE_CIRC_SETUP = 1; + +/** + * This command tells the relay to alter its min and max netflow + * timeout range values, and send padding at that rate (resuming + * if stopped). */ +struct circpad_negotiate { + u8 version IN [0]; + u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP]; + + /** Machine type is left unbounded because we can specify + * new machines in the consensus */ + u8 machine_type; + + /** If true, send a relay_drop reply.. */ + // FIXME-MP-AP: Maybe we just say to transition to the first state + // here instead.. Also what about delay before responding? + u8 echo_request IN [0,1]; +}; + +/** + * This command tells the relay to alter its min and max netflow + * timeout range values, and send padding at that rate (resuming + * if stopped). */ +struct circpad_negotiated { + u8 version IN [0]; + u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP]; + u8 response IN [CIRCPAD_RESPONSE_OK, CIRCPAD_RESPONSE_ERR]; + + /** Machine type is left unbounded because we can specify + * new machines in the consensus */ + u8 machine_type; +}; diff --git a/src/trunnel/include.am b/src/trunnel/include.am index b5db0609a8..4f4f1d3624 100644 --- a/src/trunnel/include.am +++ b/src/trunnel/include.am @@ -11,7 +11,8 @@ TRUNNELINPUTS = \ src/trunnel/link_handshake.trunnel \ src/trunnel/pwbox.trunnel \ src/trunnel/channelpadding_negotiation.trunnel \ - src/trunner/socks5.trunnel + src/trunnel/socks5.trunnel \ + src/trunnel/circpad_negotiation.trunnel TRUNNELSOURCES = \ src/ext/trunnel/trunnel.c \ @@ -23,8 +24,9 @@ TRUNNELSOURCES = \ src/trunnel/hs/cell_introduce1.c \ src/trunnel/hs/cell_rendezvous.c \ src/trunnel/channelpadding_negotiation.c \ - src/trunnel/socks5.c \ - src/trunnel/netinfo.c + src/trunnel/socks5.c \ + src/trunnel/netinfo.c \ + src/trunnel/circpad_negotiation.c TRUNNELHEADERS = \ src/ext/trunnel/trunnel.h \ @@ -39,7 +41,8 @@ TRUNNELHEADERS = \ src/trunnel/hs/cell_rendezvous.h \ src/trunnel/channelpadding_negotiation.h \ src/trunnel/socks5.h \ - src/trunnel/netinfo.h + src/trunnel/netinfo.h \ + src/trunnel/circpad_negotiation.h src_trunnel_libor_trunnel_a_SOURCES = $(TRUNNELSOURCES) src_trunnel_libor_trunnel_a_CPPFLAGS = \ -- cgit v1.2.3-54-g00ecf From 43701e1ebe169a84fe98480bd0aaa09a9eebede6 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 23 Oct 2018 21:00:51 +0000 Subject: Circuit padding machine creation events. These event callbacks allow circuit padding to decide when to attempt to launch and negotiate new padding machines, and when to tear old ones down. Co-authored-by: George Kadianakis --- src/core/or/circuitbuild.c | 4 ++++ src/core/or/circuituse.c | 14 ++++++++++++++ src/core/or/connection_edge.c | 5 +++++ src/core/or/relay.c | 4 +++- 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/core/or/circuitbuild.c b/src/core/or/circuitbuild.c index b89ec09a99..2d8bc4d4ad 100644 --- a/src/core/or/circuitbuild.c +++ b/src/core/or/circuitbuild.c @@ -43,6 +43,7 @@ #include "core/or/circuitlist.h" #include "core/or/circuitstats.h" #include "core/or/circuituse.h" +#include "core/or/circuitpadding.h" #include "core/or/command.h" #include "core/or/connection_edge.h" #include "core/or/connection_or.h" @@ -950,12 +951,15 @@ circuit_send_next_onion_skin(origin_circuit_t *circ) crypt_path_t *hop = onion_next_hop_in_cpath(circ->cpath); circuit_build_times_handle_completed_hop(circ); + circpad_machine_event_circ_added_hop(circ); + if (hop) { /* Case two: we're on a hop after the first. */ return circuit_send_intermediate_onion_skin(circ, hop); } /* Case three: the circuit is finished. Do housekeeping tasks on it. */ + circpad_machine_event_circ_built(circ); return circuit_build_no_more_hops(circ); } diff --git a/src/core/or/circuituse.c b/src/core/or/circuituse.c index e230ad1005..f8298795e2 100644 --- a/src/core/or/circuituse.c +++ b/src/core/or/circuituse.c @@ -35,6 +35,7 @@ #include "core/or/circuitlist.h" #include "core/or/circuitstats.h" #include "core/or/circuituse.h" +#include "core/or/circuitpadding.h" #include "core/or/connection_edge.h" #include "core/or/policies.h" #include "feature/client/addressmap.h" @@ -1419,6 +1420,11 @@ circuit_detach_stream(circuit_t *circ, edge_connection_t *conn) if (circ->purpose == CIRCUIT_PURPOSE_S_REND_JOINED) { hs_dec_rdv_stream_counter(origin_circ); } + + /* If there are no more streams on this circ, tell circpad */ + if (!origin_circ->p_streams) + circpad_machine_event_circ_has_no_streams(origin_circ); + return; } } else { @@ -2587,6 +2593,12 @@ link_apconn_to_circ(entry_connection_t *apconn, origin_circuit_t *circ, /* add it into the linked list of streams on this circuit */ log_debug(LD_APP|LD_CIRC, "attaching new conn to circ. n_circ_id %u.", (unsigned)circ->base_.n_circ_id); + + /* If this is the first stream on this circuit, tell circpad + * that streams are attached */ + if (!circ->p_streams) + circpad_machine_event_circ_has_streams(circ); + /* reset it, so we can measure circ timeouts */ ENTRY_TO_CONN(apconn)->timestamp_last_read_allowed = time(NULL); ENTRY_TO_EDGE_CONN(apconn)->next_stream = circ->p_streams; @@ -3065,6 +3077,8 @@ circuit_change_purpose(circuit_t *circ, uint8_t new_purpose) if (CIRCUIT_IS_ORIGIN(circ)) { control_event_circuit_purpose_changed(TO_ORIGIN_CIRCUIT(circ), old_purpose); + + circpad_machine_event_circ_purpose_changed(TO_ORIGIN_CIRCUIT(circ)); } } diff --git a/src/core/or/connection_edge.c b/src/core/or/connection_edge.c index 9f76929e53..88be6040fd 100644 --- a/src/core/or/connection_edge.c +++ b/src/core/or/connection_edge.c @@ -67,6 +67,7 @@ #include "core/or/circuitbuild.h" #include "core/or/circuitlist.h" #include "core/or/circuituse.h" +#include "core/or/circuitpadding.h" #include "core/or/connection_edge.h" #include "core/or/connection_or.h" #include "core/or/policies.h" @@ -3694,6 +3695,10 @@ handle_hs_exit_conn(circuit_t *circ, edge_connection_t *conn) /* Link the circuit and the connection crypt path. */ conn->cpath_layer = origin_circ->cpath->prev; + /* If this is the first stream on this circuit, tell circpad */ + if (!origin_circ->p_streams) + circpad_machine_event_circ_has_streams(origin_circ); + /* Add it into the linked list of p_streams on this circuit */ conn->next_stream = origin_circ->p_streams; origin_circ->p_streams = conn; diff --git a/src/core/or/relay.c b/src/core/or/relay.c index 2e92f2a55d..679fc4433d 100644 --- a/src/core/or/relay.c +++ b/src/core/or/relay.c @@ -602,7 +602,9 @@ relay_send_command_from_edge_,(streamid_t stream_id, circuit_t *circ, * one of them. Don't worry about the conn protocol version: * append_cell_to_circuit_queue will fix it up. */ cell.command = CELL_RELAY_EARLY; - --origin_circ->remaining_relay_early_cells; + /* If we're out of relay early cells, tell circpad */ + if (--origin_circ->remaining_relay_early_cells == 0) + circpad_machine_event_circ_has_no_relay_early(origin_circ); log_debug(LD_OR, "Sending a RELAY_EARLY cell; %d remaining.", (int)origin_circ->remaining_relay_early_cells); /* Memorize the command that is sent as RELAY_EARLY cell; helps debug -- cgit v1.2.3-54-g00ecf From 7be71903daff042e606e7a8445a6359100c9f8f5 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 23 Oct 2018 20:03:35 +0000 Subject: Circuit padding cell event callbacks. These callbacks allow the padding state machines to react to various types of sent and received relay cells. Co-authored-by: George Kadianakis --- src/core/or/or.h | 3 +++ src/core/or/relay.c | 25 ++++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/core/or/or.h b/src/core/or/or.h index f297bc9267..bf5e3957ad 100644 --- a/src/core/or/or.h +++ b/src/core/or/or.h @@ -207,6 +207,9 @@ struct curve25519_public_key_t; #define RELAY_COMMAND_RENDEZVOUS_ESTABLISHED 39 #define RELAY_COMMAND_INTRODUCE_ACK 40 +#define RELAY_COMMAND_PADDING_NEGOTIATE 41 +#define RELAY_COMMAND_PADDING_NEGOTIATED 42 + /* Reasons why an OR connection is closed. */ #define END_OR_CONN_REASON_DONE 1 #define END_OR_CONN_REASON_REFUSED 2 /* connection refused */ diff --git a/src/core/or/relay.c b/src/core/or/relay.c index 679fc4433d..452777b2fc 100644 --- a/src/core/or/relay.c +++ b/src/core/or/relay.c @@ -55,6 +55,7 @@ #include "core/or/circuitbuild.h" #include "core/or/circuitlist.h" #include "core/or/circuituse.h" +#include "core/or/circuitpadding.h" #include "lib/compress/compress.h" #include "app/config/config.h" #include "core/mainloop/connection.h" @@ -80,7 +81,6 @@ #include "feature/nodelist/describe.h" #include "feature/nodelist/routerlist.h" #include "core/or/scheduler.h" -#include "feature/stats/rephist.h" #include "core/or/cell_st.h" #include "core/or/cell_queue_st.h" @@ -293,7 +293,9 @@ circuit_receive_relay_cell(cell_t *cell, circuit_t *circ, return 0; } - /* not recognized. pass it on. */ + /* not recognized. inform circpad and pass it on. */ + circpad_deliver_unrecognized_cell_events(circ, cell_direction); + if (cell_direction == CELL_DIRECTION_OUT) { cell->circ_id = circ->n_circ_id; /* switch it */ chan = circ->n_chan; @@ -524,6 +526,7 @@ relay_command_to_string(uint8_t command) case RELAY_COMMAND_INTRODUCE_ACK: return "INTRODUCE_ACK"; case RELAY_COMMAND_EXTEND2: return "EXTEND2"; case RELAY_COMMAND_EXTENDED2: return "EXTENDED2"; + case RELAY_COMMAND_PADDING_NEGOTIATE: return "PADDING_NEGOTIATE"; default: tor_snprintf(buf, sizeof(buf), "Unrecognized relay command %u", (unsigned)command); @@ -577,8 +580,8 @@ relay_send_command_from_edge_,(streamid_t stream_id, circuit_t *circ, log_debug(LD_OR,"delivering %d cell %s.", relay_command, cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward"); - if (relay_command == RELAY_COMMAND_DROP) - rep_hist_padding_count_write(PADDING_TYPE_DROP); + /* Tell circpad we're sending a relay cell */ + circpad_deliver_sent_relay_cell_events(circ, relay_command); /* If we are sending an END cell and this circuit is used for a tunneled * directory request, advance its state. */ @@ -1483,9 +1486,11 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ, } } + /* Tell circpad that we've recieved a recognized cell */ + circpad_deliver_recognized_relay_cell_events(circ, rh.command, layer_hint); + /* either conn is NULL, in which case we've got a control cell, or else * conn points to the recognized stream. */ - if (conn && !connection_state_is_open(TO_CONN(conn))) { if (conn->base_.type == CONN_TYPE_EXIT && (conn->base_.state == EXIT_CONN_STATE_CONNECTING || @@ -1506,8 +1511,14 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ, switch (rh.command) { case RELAY_COMMAND_DROP: - rep_hist_padding_count_read(PADDING_TYPE_DROP); -// log_info(domain,"Got a relay-level padding cell. Dropping."); + /* Already examined in circpad_deliver_recognized_relay_cell_events */ + return 0; + case RELAY_COMMAND_PADDING_NEGOTIATE: + circpad_handle_padding_negotiate(circ, cell); + return 0; + case RELAY_COMMAND_PADDING_NEGOTIATED: + if (circpad_handle_padding_negotiated(circ, cell, layer_hint) == 0) + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), rh.length); return 0; case RELAY_COMMAND_BEGIN: case RELAY_COMMAND_BEGIN_DIR: -- cgit v1.2.3-54-g00ecf From 9aaf72ea58bd52d0af694411d16194ea15d612d1 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 23 Oct 2018 19:49:26 +0000 Subject: Circuit padding implementation. This implements all of the event handling, state machines, and padding decisions for circuit padding. I recommend reviewing this after you look at the call-in points into it from the rest of Tor. Co-authored-by: George Kadianakis --- src/core/include.am | 2 + src/core/or/circuitpadding.c | 2467 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2469 insertions(+) create mode 100644 src/core/or/circuitpadding.c diff --git a/src/core/include.am b/src/core/include.am index 5e69cb9ada..ae47c75e09 100644 --- a/src/core/include.am +++ b/src/core/include.am @@ -32,6 +32,7 @@ LIBTOR_APP_A_SOURCES = \ src/core/or/circuitlist.c \ src/core/or/circuitmux.c \ src/core/or/circuitmux_ewma.c \ + src/core/or/circuitpadding.c \ src/core/or/circuitstats.c \ src/core/or/circuituse.c \ src/core/or/command.c \ @@ -227,6 +228,7 @@ noinst_HEADERS += \ src/core/or/circuitmux.h \ src/core/or/circuitmux_ewma.h \ src/core/or/circuitstats.h \ + src/core/or/circuitpadding.h \ src/core/or/circuituse.h \ src/core/or/command.h \ src/core/or/connection_edge.h \ diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c new file mode 100644 index 0000000000..9d65e2cf22 --- /dev/null +++ b/src/core/or/circuitpadding.c @@ -0,0 +1,2467 @@ +/* Copyright (c) 2017 The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#include +#include "lib/math/fp.h" +#include "core/or/or.h" +#include "core/or/circuitpadding.h" +#include "core/or/circuitlist.h" +#include "core/or/circuituse.h" +#include "core/or/relay.h" +#include "feature/stats/rephist.h" +#include "feature/nodelist/networkstatus.h" + +#include "core/or/channel.h" + +#include "lib/time/compat_time.h" +#include "lib/crypt_ops/crypto_rand.h" + +#include "core/or/crypt_path_st.h" +#include "core/or/circuit_st.h" +#include "core/or/origin_circuit_st.h" +#include "feature/nodelist/routerstatus_st.h" +#include "feature/nodelist/node_st.h" +#include "core/or/cell_st.h" +#include "core/or/extend_info_st.h" +#include "core/crypto/relay_crypto.h" +#include "feature/nodelist/nodelist.h" + +#include "app/config/config.h" + +/* XXX: This is a dup of the constant in ./src/lib/time/tvdiff.c. + * Should/Do we have a header for time constants like this? */ +#define TOR_USEC_PER_SEC (1000000) + +static inline circpad_purpose_mask_t circpad_circ_purpose_to_mask(uint8_t + circ_purpose); +static inline circpad_circuit_state_t circpad_circuit_state( + origin_circuit_t *circ); +static void circpad_setup_machine_on_circ(circuit_t *on_circ, + const circpad_machine_t *machine); +static double circpad_distribution_sample(circpad_distribution_t dist); + +/** Cached consensus params */ +static uint8_t circpad_global_max_padding_percent; +static uint16_t circpad_global_allowed_cells; + +/** Global cell counts, for rate limiting */ +static uint64_t circpad_global_padding_sent; +static uint64_t circpad_global_nonpadding_sent; + +/** This is the list of circpad_machine_t's parsed from consensus and torrc + * that have origin_side == 1 (ie: are for client side) */ +STATIC smartlist_t *origin_padding_machines = NULL; + +/** This is the list of circpad_machine_t's parsed from consensus and torrc + * that have origin_side == 0 (ie: are for relay side) */ +STATIC smartlist_t *relay_padding_machines = NULL; + +/** Loop over the current padding state machines using loop_var as the + * loop variable. */ +#define FOR_EACH_CIRCUIT_MACHINE_BEGIN(loop_var) \ + STMT_BEGIN \ + for (int loop_var = 0; loop_var < CIRCPAD_MAX_MACHINES; loop_var++) { +#define FOR_EACH_CIRCUIT_MACHINE_END } STMT_END ; + +/** Loop over the current active padding state machines using loop_var + * as the loop variable. If a machine is not active, skip it. */ +#define FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(loop_var, circ) \ + FOR_EACH_CIRCUIT_MACHINE_BEGIN(loop_var) \ + if (!(circ)->padding_info[loop_var]) \ + continue; +#define FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END } STMT_END ; + +/** + * Return a human-readable description for a circuit padding state. + */ +static const char * +circpad_state_to_string(circpad_statenum_t state) +{ + const char *descr; + + switch (state) { + case CIRCPAD_STATE_START: + descr = "START"; + break; + case CIRCPAD_STATE_BURST: + descr = "BURST"; + break; + case CIRCPAD_STATE_GAP: + descr = "GAP"; + break; + case CIRCPAD_STATE_END: + descr = "END"; + break; + default: + descr = "CUSTOM"; // XXX: Just return # in static char buf? + } + + return descr; +} + +/** + * Free the machineinfo at an index + */ +static void +circpad_circuit_machineinfo_free_idx(circuit_t *circ, int idx) +{ + if (circ->padding_info[idx]) { + tor_free(circ->padding_info[idx]->histogram); + timer_free(circ->padding_info[idx]->padding_timer); + tor_free(circ->padding_info[idx]); + } +} + +/** Free all the machineinfos in circ that match machine_num. */ +static void +free_circ_machineinfos_with_machine_num(circuit_t *circ, int machine_num) +{ + FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) { + if (circ->padding_machine[i] && + circ->padding_machine[i]->machine_num == machine_num) { + circpad_circuit_machineinfo_free_idx(circ, i); + circ->padding_machine[i] = NULL; + } + } FOR_EACH_CIRCUIT_MACHINE_END; +} + +/** + * Free all padding machines and mutable info associated with circuit + */ +void +circpad_circuit_free_all_machineinfos(circuit_t *circ) +{ + FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) { + circpad_circuit_machineinfo_free_idx(circ, i); + } FOR_EACH_CIRCUIT_MACHINE_END; +} + +/** + * Allocate a new mutable machineinfo structure. + */ +STATIC circpad_machineinfo_t * +circpad_circuit_machineinfo_new(circuit_t *on_circ, int machine_index) +{ + circpad_machineinfo_t *mi = tor_malloc_zero(sizeof(circpad_machineinfo_t)); + mi->machine_index = machine_index; + mi->on_circ = on_circ; + + return mi; +} + +/** + * Return the circpad_state_t for the current state based on the + * mutable info. + * + * This function returns NULL when the machine is in the end state or in an + * invalid state. + */ +STATIC const circpad_state_t * +circpad_machine_current_state(circpad_machineinfo_t *mi) +{ + const circpad_machine_t *machine = CIRCPAD_GET_MACHINE(mi); + + if (mi->current_state == CIRCPAD_STATE_END) { + return NULL; + } else if (BUG(mi->current_state >= machine->num_states)) { + log_fn(LOG_WARN,LD_CIRC, + "Invalid circuit padding state %d", + mi->current_state); + + return NULL; + } + + return &machine->states[mi->current_state]; +} + +/** + * Calculate the lower bound of a histogram bin. The upper bound + * is obtained by calling this function with bin+1, and subtracting 1. + * + * The 0th bin has a special value -- it only represents start_usec. + * This is so we can specify a probability on 0-delay values. + * + * After bin 0, bins are exponentially spaced, so that each subsequent + * bin is twice as large as the previous. This is done so that higher + * time resolution is given to lower time values. + * + * The infinity bin is a the last bin in the array (histogram_len-1). + * It has a usec value of CIRCPAD_DELAY_INFINITE (UINT32_MAX). + */ +STATIC circpad_delay_t +circpad_histogram_bin_to_usec(circpad_machineinfo_t *mi, + circpad_hist_index_t bin) +{ + const circpad_state_t *state = circpad_machine_current_state(mi); + circpad_delay_t start_usec; + + /* Our state should have been checked to be non-null by the caller + * (circpad_machine_remove_token()) */ + if (BUG(state == NULL)) { + return CIRCPAD_DELAY_INFINITE; + } + + if (state->use_rtt_estimate) + start_usec = mi->rtt_estimate_usec+state->start_usec; + else + start_usec = state->start_usec; + + if (bin >= CIRCPAD_INFINITY_BIN(state)) + return CIRCPAD_DELAY_INFINITE; + + if (bin == 0) + return start_usec; + + if (bin == 1) + return start_usec+1; + + /* The bin widths double every index, so that we can have more resolution + * for lower time values in the histogram. */ + const circpad_time_t bin_width_exponent = + 1 << (CIRCPAD_INFINITY_BIN(state) - bin); + return (circpad_delay_t)MIN(start_usec + + state->range_usec/bin_width_exponent, + CIRCPAD_DELAY_INFINITE); +} + +/** + * Return the bin that contains the usec argument. + * "Contains" is defined as us in [lower, upper). + * + * This function will never return the infinity bin (histogram_len-1), + * in order to simplify the rest of the code. + * + * This means that technically the last bin (histogram_len-2) + * has range [start_usec+range_usec, CIRCPAD_DELAY_INFINITE]. + */ +STATIC circpad_hist_index_t +circpad_histogram_usec_to_bin(const circpad_machineinfo_t *mi, + circpad_delay_t usec) +{ + const circpad_state_t *state = circpad_machine_current_state(mi); + circpad_delay_t start_usec; + int32_t bin; /* Larger than return type to properly clamp overflow */ + + /* Our state should have been checked to be non-null by the caller + * (circpad_machine_remove_token()) */ + if (BUG(state == NULL)) { + return 0; + } + + if (state->use_rtt_estimate) + start_usec = mi->rtt_estimate_usec+state->start_usec; + else + start_usec = state->start_usec; + + if (usec <= start_usec) + return 0; + + if (usec == start_usec+1) + return 1; + + const circpad_time_t histogram_range_usec = state->range_usec; + /* We need to find the bin corresponding to our position in the range. + * Since bins are exponentially spaced in powers of two, we need to + * take the log2 of our position in histogram_range_usec. However, + * since tor_log2() returns the floor(log2(u64)), we have to adjust + * it to behave like ceil(log2(u64)). This is verified in our tests + * to properly invert the operation done in + * circpad_histogram_bin_to_usec(). */ + bin = CIRCPAD_INFINITY_BIN(state) - + tor_log2(2*histogram_range_usec/(usec-start_usec+1)); + + /* Clamp the return value to account for timevals before the start + * of bin 0, or after the last bin. Don't return the infinity bin + * index. */ + bin = MIN(MAX(bin, 1), CIRCPAD_INFINITY_BIN(state)-1); + return bin; +} + +/** + * This function frees any token bins allocated from a previous state + * + * Called after a state transition, or if the bins are empty. + */ +STATIC void +circpad_machine_setup_tokens(circpad_machineinfo_t *mi) +{ + const circpad_state_t *state = circpad_machine_current_state(mi); + + /* If this state doesn't exist, or doesn't have token removal, + * free any previous state's histogram, and bail */ + if (!state || state->token_removal == CIRCPAD_TOKEN_REMOVAL_NONE) { + if (mi->histogram) { + tor_free(mi->histogram); + mi->histogram = NULL; + mi->histogram_len = 0; + } + return; + } + + /* Try to avoid re-mallocing if we don't really need to */ + if (!mi->histogram || (mi->histogram + && mi->histogram_len != state->histogram_len)) { + tor_free(mi->histogram); // null ok + mi->histogram = tor_malloc_zero(sizeof(circpad_hist_token_t) + *state->histogram_len); + } + mi->histogram_len = state->histogram_len; + + memcpy(mi->histogram, state->histogram, + sizeof(circpad_hist_token_t)*state->histogram_len); +} + +/** + * Choose a length for this state (in cells), if specified. + */ +static void +circpad_choose_state_length(circpad_machineinfo_t *mi) +{ + const circpad_state_t *state = circpad_machine_current_state(mi); + double length; + + if (!state || state->length_dist.type == CIRCPAD_DIST_NONE) { + mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE; + return; + } + + length = circpad_distribution_sample(state->length_dist); + length = MAX(0, length); + length += state->start_length; + length = MIN(length, state->max_length); + + mi->state_length = clamp_double_to_int64(length); +} + +/** + * Sample a value from our iat_dist, and clamp it safely + * to circpad_delay_t. + */ +static circpad_delay_t +circpad_distribution_sample_iat_delay(const circpad_state_t *state, + circpad_delay_t start_usec) +{ + double val = circpad_distribution_sample(state->iat_dist); + /* These comparisons are safe, because the output is in the range + * [0, 2**32), and double has a precision of 53 bits. */ + val = MAX(0, val); + val = MIN(val, state->range_usec); + + /* This addition is exact: val is at most 2**32-1, start_usec + * is at most 2**32-1, and doubles have a precision of 53 bits. */ + val += start_usec; + + /* Clamp the distribution at infinite delay val */ + return (circpad_delay_t)MIN(tor_llround(val), CIRCPAD_DELAY_INFINITE); +} + +/** + * Sample an expected time-until-next-packet delay from the histogram. + * + * The bin is chosen with probability proportional to the number + * of tokens in each bin, and then a time value is chosen uniformly from + * that bin's [start,end) time range. + */ +static circpad_delay_t +circpad_machine_sample_delay(circpad_machineinfo_t *mi) +{ + const circpad_state_t *state = circpad_machine_current_state(mi); + const circpad_hist_token_t *histogram = NULL; + circpad_hist_index_t curr_bin = 0; + circpad_delay_t bin_start, bin_end; + circpad_delay_t start_usec; + /* These three must all be larger than circpad_hist_token_t, because + * we sum several circpad_hist_token_t values across the histogram */ + uint64_t curr_weight = 0; + uint64_t histogram_total_tokens = 0; + uint64_t bin_choice; + + tor_assert(state); + + if (state->use_rtt_estimate) + start_usec = mi->rtt_estimate_usec+state->start_usec; + else + start_usec = state->start_usec; + + if (state->iat_dist.type != CIRCPAD_DIST_NONE) { + /* Sample from a fixed IAT distribution and return */ + return circpad_distribution_sample_iat_delay(state, start_usec); + } else if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE) { + /* We have a mutable histogram. Do basic sanity check and apply: */ + if (BUG(!mi->histogram) || + BUG(mi->histogram_len != state->histogram_len)) { + return CIRCPAD_DELAY_INFINITE; + } + + histogram = mi->histogram; + for (circpad_hist_index_t b = 0; b < state->histogram_len; b++) + histogram_total_tokens += histogram[b]; + } else { + /* We have a histogram, but it's immutable */ + histogram = state->histogram; + histogram_total_tokens = state->histogram_total_tokens; + } + + bin_choice = crypto_rand_uint64(histogram_total_tokens); + + /* Skip all the initial zero bins */ + while (!histogram[curr_bin]) { + curr_bin++; + } + curr_weight = histogram[curr_bin]; + + // TODO: This is not constant-time. Pretty sure we don't + // really need it to be, though. + while (curr_weight < bin_choice) { + curr_bin++; + /* It should be impossible to run past the end of the histogram */ + if (BUG(curr_bin >= state->histogram_len)) { + return CIRCPAD_DELAY_INFINITE; + } + curr_weight += histogram[curr_bin]; + } + + /* Do some basic checking of the current bin we are in */ + if (BUG(curr_bin >= state->histogram_len) || + BUG(histogram[curr_bin] == 0)) { + return CIRCPAD_DELAY_INFINITE; + } + + // Store this index to remove the token upon callback. + if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE) { + mi->chosen_bin = curr_bin; + } + + if (curr_bin >= CIRCPAD_INFINITY_BIN(state)) { + if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE && + mi->histogram[curr_bin] > 0) { + mi->histogram[curr_bin]--; + } + + // Infinity: Don't send a padding packet. Wait for a real packet + // and then see if our bins are empty or what else we should do. + return CIRCPAD_DELAY_INFINITE; + } + + tor_assert(curr_bin < CIRCPAD_INFINITY_BIN(state)); + + bin_start = circpad_histogram_bin_to_usec(mi, curr_bin); + bin_end = circpad_histogram_bin_to_usec(mi, curr_bin+1); + + /* Truncate the high bin in case it's the infinity bin: + * Don't actually schedule an "infinite"-1 delay */ + bin_end = MIN(bin_end, start_usec+state->range_usec); + + // Sample uniformly between histogram[i] to histogram[i+1]-1, + // but no need to sample if they are the same timeval (aka bin 0 or bin 1). + if (bin_end <= bin_start+1) + return bin_start; + else + return (circpad_delay_t)crypto_rand_uint64_range(bin_start, bin_end); +} + +/** + * Sample a value from the specified probability distribution. + * + * This performs inverse transform sampling + * (https://en.wikipedia.org/wiki/Inverse_transform_sampling). + * + * XXX: These formulas were taken verbatim. Need a floating wizard + * to check them for catastropic cancellation and other issues (teor?). + * Also: is 32bits of double from [0.0,1.0) enough? + */ +static double +circpad_distribution_sample(circpad_distribution_t dist) +{ + double p = 0; + + switch (dist.type) { + case CIRCPAD_DIST_NONE: + return 0; + case CIRCPAD_DIST_UNIFORM: + p = crypto_rand_double(); + // param2 is upper bound, param1 is lower + /* The subtraction is exact as long as param2 and param1 are less than + * 2**53. The multiplication is accurate as long as (param2 - param1) + * is less than 2**52. (And when they are large, the low bits aren't + * important.) The result covers the full range of outputs, as long as + * p has a resolution of 1/2**32 or greater. */ + p *= (dist.param2 - dist.param1); + p += dist.param1; + return p; + case CIRCPAD_DIST_LOGISTIC: + p = crypto_rand_double(); + /* https://en.wikipedia.org/wiki/Logistic_distribution#Quantile_function + * param1 is Mu, param2 is s. */ + if (p <= 0.0) // Avoid log(0) + return 0; + return dist.param1 + dist.param2*tor_mathlog(p/(1.0-p)); + case CIRCPAD_DIST_LOG_LOGISTIC: + p = crypto_rand_double(); + /* https://en.wikipedia.org/wiki/Log-logistic_distribution#Quantiles + * param1 is Alpha, param2 is Beta */ + return dist.param1 * pow(p/(1.0-p), 1.0/dist.param2); + case CIRCPAD_DIST_GEOMETRIC: + p = crypto_rand_double(); + /* https://github.com/distributions-io/geometric-quantile/ + * param1 is 'p' (success probability) */ + return ceil(tor_mathlog(1.0-p)/tor_mathlog(1.0-dist.param1)); + case CIRCPAD_DIST_WEIBULL: + p = crypto_rand_double(); + /* https://en.wikipedia.org/wiki/Weibull_distribution \ + * #Cumulative_distribution_function + * param1 is k, param2 is Lambda */ + return dist.param2*pow(-tor_mathlog(1.0-p), 1.0/dist.param1); + case CIRCPAD_DIST_PARETO: + p = 1.0-crypto_rand_double(); // Pareto quantile needs (0,1] + + /* https://en.wikipedia.org/wiki/Generalized_Pareto_distribution \ + * #Generating_generalized_Pareto_random_variables + * param1 is Sigma, param2 is Xi + * Since it's piecewise, we must define it for 0 (or close to 0) */ + if (fabs(dist.param2) <= 1e-22) + return -dist.param1*tor_mathlog(p); + else + return dist.param1*(pow(p, -dist.param2) - 1.0)/dist.param2; + } + return 0; +} + +/** + * Find the index of the first bin whose upper bound is + * greater than the target, and that has tokens remaining. + */ +static circpad_hist_index_t +circpad_machine_first_higher_index(circpad_machineinfo_t *mi, + circpad_delay_t target_bin_usec) +{ + circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, + target_bin_usec); + + /* Don't remove from the infinity bin */ + for (; bin < CIRCPAD_INFINITY_BIN(mi); bin++) { + if (mi->histogram[bin] && + circpad_histogram_bin_to_usec(mi, bin+1) > target_bin_usec) { + return bin; + } + } + + return mi->histogram_len; +} + +/** + * Find the index of the first bin whose lower bound is lower or equal to + * target_bin_usec, and that still has tokens remaining. + */ +static circpad_hist_index_t +circpad_machine_first_lower_index(circpad_machineinfo_t *mi, + circpad_delay_t target_bin_usec) +{ + circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, + target_bin_usec); + + for (; bin >= 0; bin--) { + if (mi->histogram[bin] && + circpad_histogram_bin_to_usec(mi, bin) <= target_bin_usec) { + return bin; + } + } + + return -1; +} + +/** + * Remove a token from the first non-empty bin whose upper bound is + * greater than the target. + */ +STATIC void +circpad_machine_remove_higher_token(circpad_machineinfo_t *mi, + circpad_delay_t target_bin_usec) +{ + /* We need to remove the token from the first bin + * whose upper bound is greater than the target, and that + * has tokens remaining. */ + circpad_hist_index_t bin = circpad_machine_first_higher_index(mi, + target_bin_usec); + + if (bin >= 0 && bin < CIRCPAD_INFINITY_BIN(mi)) { + if (!BUG(mi->histogram[bin] == 0)) { + mi->histogram[bin]--; + } + } +} + +/** + * Remove a token from the first non-empty bin whose upper bound is + * lower than the target. + */ +STATIC void +circpad_machine_remove_lower_token(circpad_machineinfo_t *mi, + circpad_delay_t target_bin_usec) +{ + circpad_hist_index_t bin = circpad_machine_first_lower_index(mi, + target_bin_usec); + + if (bin >= 0 && bin < CIRCPAD_INFINITY_BIN(mi)) { + if (!BUG(mi->histogram[bin] == 0)) { + mi->histogram[bin]--; + } + } +} + +/* Helper macro: Ensure that the bin has tokens available, and BUG out of the + * function if it's not the case. */ +#define ENSURE_BIN_CAPACITY(bin_index) \ + if (BUG(mi->histogram[bin_index] == 0)) { \ + return; \ + } + +/** + * Remove a token from the closest non-empty bin to the target. + * + * If use_usec is true, measure "closest" in terms of bin start usec. + * If it is false, use bin index distance only. + */ +STATIC void +circpad_machine_remove_closest_token(circpad_machineinfo_t *mi, + circpad_delay_t target_bin_usec, + bool use_usec) +{ + circpad_hist_index_t lower, higher, current; + circpad_hist_index_t bin_to_remove = -1; + + lower = circpad_machine_first_lower_index(mi, target_bin_usec); + higher = circpad_machine_first_higher_index(mi, target_bin_usec); + current = circpad_histogram_usec_to_bin(mi, target_bin_usec); + + /* Sanity check the results */ + if (BUG(lower > current) || BUG(higher < current)) { + return; + } + + if (higher == mi->histogram_len && lower == -1) { + // Bins are empty + return; + } else if (higher == mi->histogram_len) { + /* All higher bins are empty */ + ENSURE_BIN_CAPACITY(lower); + mi->histogram[lower]--; + return; + } else if (lower == -1) { + /* All lower bins are empty */ + ENSURE_BIN_CAPACITY(higher); + mi->histogram[higher]--; + return; + } + + if (use_usec) { + /* Find the closest bin midpoint to the target */ + circpad_delay_t lower_usec = circpad_get_histogram_bin_midpoint(mi, lower); + circpad_delay_t higher_usec = + circpad_get_histogram_bin_midpoint(mi, higher); + + if (target_bin_usec < lower_usec) { + // Lower bin is closer + ENSURE_BIN_CAPACITY(lower); + bin_to_remove = lower; + } else if (target_bin_usec > higher_usec) { + // Higher bin is closer + ENSURE_BIN_CAPACITY(higher); + bin_to_remove = higher; + } else if (target_bin_usec-lower_usec > higher_usec-target_bin_usec) { + // Higher bin is closer + ENSURE_BIN_CAPACITY(higher); + bin_to_remove = higher; + } else { + // Lower bin is closer + ENSURE_BIN_CAPACITY(lower); + bin_to_remove = lower; + } + } else { + if (current - lower > higher - current) { + // Higher bin is closer + ENSURE_BIN_CAPACITY(higher); + mi->histogram[higher]--; + return; + } else { + // Lower bin is closer + ENSURE_BIN_CAPACITY(lower); + mi->histogram[lower]--; + return; + } + } +} + +#undef ENSURE_BIN_CAPACITY + +/** + * Remove a token from the exact bin corresponding to the target. + * + * If it is empty, do nothing. + */ +static void +circpad_machine_remove_exact(circpad_machineinfo_t *mi, + circpad_delay_t target_bin_usec) +{ + circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, + target_bin_usec); + + if (mi->histogram[bin] > 0) + mi->histogram[bin]--; +} + +/** + * Check our state's cell limit count and tokens. + * + * Returns 1 if either limits are hit and we decide to change states, + * otherwise returns 0. + */ +static circpad_decision_t +check_machine_token_supply(circpad_machineinfo_t *mi) +{ + uint32_t histogram_total_tokens = 0; + + /* Check if bins empty. This requires summing up the current mutable + * machineinfo histogram token total and checking if it is zero. + * Machineinfo does not keep a running token count. We're assuming the + * extra space is not worth this short loop iteration. + * + * We also do not count infinity bin in histogram totals. + */ + if (mi->histogram_len && mi->histogram) { + for (circpad_hist_index_t b = 0; b < CIRCPAD_INFINITY_BIN(mi); b++) + histogram_total_tokens += mi->histogram[b]; + + /* If we change state, we're done */ + if (histogram_total_tokens == 0) { + if (circpad_internal_event_bins_empty(mi) == CIRCPAD_STATE_CHANGED) + return CIRCPAD_STATE_CHANGED; + } + } + + if (mi->state_length == 0) { + return circpad_internal_event_state_length_up(mi); + } + + return CIRCPAD_STATE_UNCHANGED; +} + +/** + * Remove a token from the bin corresponding to the delta since + * last packet. If that bin is empty, choose a token based on + * the specified removal strategy in the state machine. + * + * This function also updates and checks rate limit and state + * limit counters. + * + * Returns 1 if we transition states, 0 otherwise. + */ +circpad_decision_t +circpad_machine_remove_token(circpad_machineinfo_t *mi) +{ + const circpad_state_t *state = NULL; + circpad_time_t current_time; + circpad_delay_t target_bin_usec; + + /* Update non-padding counts for rate limiting: We scale at UINT16_MAX + * because we only use this for a percentile limit of 2 sig figs, and + * space is scare in the machineinfo struct. */ + mi->nonpadding_sent++; + if (mi->nonpadding_sent == UINT16_MAX) { + mi->padding_sent /= 2; + mi->nonpadding_sent /= 2; + } + + /* Dont remove any tokens if there was no padding scheduled */ + if (!mi->padding_scheduled_at_usec) { + return CIRCPAD_STATE_UNCHANGED; + } + + state = circpad_machine_current_state(mi); + current_time = monotime_absolute_usec(); + + /* If we have scheduled padding some time in the future, we want to see what + bin we are in at the current time */ + target_bin_usec = (circpad_delay_t) + MIN((current_time - mi->padding_scheduled_at_usec), + CIRCPAD_DELAY_INFINITE-1); + + /* We are treating this non-padding cell as a padding cell, so we cancel + padding timer, if present. */ + mi->padding_scheduled_at_usec = 0; + if (mi->is_padding_timer_scheduled) { + mi->is_padding_timer_scheduled = 0; + timer_disable(mi->padding_timer); + } + + /* If we are not in a padding state (like start or end), we're done */ + if (!state) + return CIRCPAD_STATE_UNCHANGED; + + /* If we're enforcing a state length on non-padding packets, + * decrement it */ + if (mi->state_length != CIRCPAD_STATE_LENGTH_INFINITE && + state->length_includes_nonpadding && + mi->state_length > 0) { + mi->state_length--; + } + + /* Perform the specified token removal strategy */ + switch (state->token_removal) { + case CIRCPAD_TOKEN_REMOVAL_NONE: + break; + case CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC: + circpad_machine_remove_closest_token(mi, target_bin_usec, 1); + break; + case CIRCPAD_TOKEN_REMOVAL_CLOSEST: + circpad_machine_remove_closest_token(mi, target_bin_usec, 0); + break; + case CIRCPAD_TOKEN_REMOVAL_LOWER: + circpad_machine_remove_lower_token(mi, target_bin_usec); + break; + case CIRCPAD_TOKEN_REMOVAL_HIGHER: + circpad_machine_remove_higher_token(mi, target_bin_usec); + break; + case CIRCPAD_TOKEN_REMOVAL_EXACT: + circpad_machine_remove_exact(mi, target_bin_usec); + break; + } + + /* Check our token and state length limits */ + return check_machine_token_supply(mi); +} + +/** + * Send a relay command with a relay cell payload on a circuit to + * the particular hopnum. + * + * Hopnum starts at 1 (1=guard, 2=middle, 3=exit, etc). + * + * Payload may be null. + * + * Returns negative on error, 0 on success. + */ +static signed_error_t +circpad_send_command_to_hop(origin_circuit_t *circ, uint8_t hopnum, + uint8_t relay_command, const uint8_t *payload, + ssize_t payload_len) +{ + crypt_path_t *target_hop = circuit_get_cpath_hop(circ, hopnum); + signed_error_t ret; + + /* Check that the cpath has the target hop */ + if (!target_hop) { + log_fn(LOG_WARN, LD_BUG, "Padding circuit %u has %d hops, not %d", + circ->global_identifier, circuit_get_cpath_len(circ), hopnum); + return -1; + } + + /* Check that the target hop is opened */ + if (target_hop->state != CPATH_STATE_OPEN) { + log_fn(LOG_WARN,LD_CIRC, + "Padding circuit %u has %d hops, not %d", + circ->global_identifier, + circuit_get_cpath_opened_len(circ), hopnum); + return -1; + } + + /* Send the drop command to the second hop */ + ret = relay_send_command_from_edge(0, TO_CIRCUIT(circ), relay_command, + (const char*)payload, payload_len, + target_hop); + return ret; +} + +/** + * Callback helper to send a padding cell. + * + * This helper is called after our histogram-sampled delay period passes + * without another packet being sent first. If a packet is sent before this + * callback happens, it is canceled. So when we're called here, send padding + * right away. + * + * If sending this padding cell forced us to transition states return + * CIRCPAD_STATE_CHANGED. Otherwise return CIRCPAD_STATE_UNCHANGED. + */ +circpad_decision_t +circpad_send_padding_cell_for_callback(circpad_machineinfo_t *mi) +{ + circuit_t *circ = mi->on_circ; + int machine_idx = mi->machine_index; + mi->padding_scheduled_at_usec = 0; + circpad_statenum_t state = mi->current_state; + + // Make sure circuit didn't close on us + if (mi->on_circ->marked_for_close) { + log_fn(LOG_INFO,LD_CIRC, + "Padding callback on a circuit marked for close. Ignoring."); + return CIRCPAD_STATE_CHANGED; + } + + /* If it's a histogram, reduce the token count */ + if (mi->histogram && mi->histogram_len) { + /* Basic sanity check on the histogram before removing anything */ + if (BUG(mi->chosen_bin >= mi->histogram_len) || + BUG(mi->histogram[mi->chosen_bin] == 0)) { + return CIRCPAD_STATE_CHANGED; + } + + mi->histogram[mi->chosen_bin]--; + } + + /* If we have a valid state length bound, consider it */ + if (mi->state_length != CIRCPAD_STATE_LENGTH_INFINITE && + !BUG(mi->state_length <= 0)) { + mi->state_length--; + } + + /* + * Update non-padding counts for rate limiting: We scale at UINT16_MAX + * because we only use this for a percentile limit of 2 sig figs, and + * space is scare in the machineinfo struct. + */ + mi->padding_sent++; + if (mi->padding_sent == UINT16_MAX) { + mi->padding_sent /= 2; + mi->nonpadding_sent /= 2; + } + circpad_global_padding_sent++; + + if (CIRCUIT_IS_ORIGIN(mi->on_circ)) { + circpad_send_command_to_hop(TO_ORIGIN_CIRCUIT(mi->on_circ), + CIRCPAD_GET_MACHINE(mi)->target_hopnum, + RELAY_COMMAND_DROP, NULL, 0); + log_fn(LOG_INFO,LD_CIRC, "Callback: Sending padding to origin circuit %u.", + TO_ORIGIN_CIRCUIT(mi->on_circ)->global_identifier); + } else { + // If we're a non-origin circ, we can just send from here as if we're the + // edge. + log_fn(LOG_INFO,LD_CIRC, + "Callback: Sending padding to non-origin circuit."); + relay_send_command_from_edge(0, mi->on_circ, RELAY_COMMAND_DROP, NULL, + 0, NULL); + } + + rep_hist_padding_count_write(PADDING_TYPE_DROP); + /* This is a padding cell sent from the client or from the middle node, + * (because it's invoked from circuitpadding.c) */ + circpad_cell_event_padding_sent(circ); + + /* The circpad_cell_event_padding_sent() could cause us to transition. + * Check that we still have a padding machineinfo, and then check our token + * supply. */ + if (circ->padding_info[machine_idx] != NULL) { + if (state != circ->padding_info[machine_idx]->current_state) + return CIRCPAD_STATE_CHANGED; + else + return check_machine_token_supply(circ->padding_info[machine_idx]); + } else { + return CIRCPAD_STATE_CHANGED; + } +} + +/** + * Tor-timer compatible callback that tells us to send a padding cell. + * + * Timers are associated with circpad_machineinfo_t's. When the machineinfo + * is freed on a circuit, the timers are cancelled. Since the lifetime + * of machineinfo is always longer than the timers, handles are not + * needed. + */ +static void +circpad_send_padding_callback(tor_timer_t *timer, void *args, + const struct monotime_t *time) +{ + circpad_machineinfo_t *mi = ((circpad_machineinfo_t*)args); + (void)timer; (void)time; + + if (mi && mi->on_circ) { + assert_circuit_ok(mi->on_circ); + circpad_send_padding_cell_for_callback(mi); + } else { + // This shouldn't happen (represents a timer leak) + log_fn(LOG_WARN,LD_CIRC, + "Circuit closed while waiting for padding timer."); + tor_fragile_assert(); + } + + // TODO-MP-AP: Unify this counter with channelpadding for rephist stats + //total_timers_pending--; +} + +/** + * Cache our consensus parameters upon consensus update. + */ +void +circpad_new_consensus_params(const networkstatus_t *ns) +{ + circpad_global_allowed_cells = + networkstatus_get_param(ns, "circpad_global_allowed_cells", + 0, 0, UINT16_MAX-1); + + circpad_global_max_padding_percent = + networkstatus_get_param(ns, "circpad_global_max_padding_pct", + 0, 0, 100); +} + +/** + * Check this machine against its padding limits, as well as global + * consensus limits. + * + * We have two limits: a percent and a cell count. The cell count + * limit must be reached before the percent is enforced (this is to + * optionally allow very light padding of things like circuit setup + * while there is no other traffic on the circuit). + * + * TODO: Don't apply limits to machines form torrc. + * + * Returns 1 if limits are set and we've hit them. Otherwise returns 0. + */ +static bool +circpad_machine_reached_padding_limit(circpad_machineinfo_t *mi) +{ + const circpad_machine_t *machine = CIRCPAD_GET_MACHINE(mi); + + /* If machine_padding_pct is non-zero, and we've sent more + * than the allowed count of padding cells, then check our + * percent limits for this machine. */ + if (machine->max_padding_percent && + mi->padding_sent >= machine->allowed_padding_count) { + uint32_t total_cells = mi->padding_sent + mi->nonpadding_sent; + + /* Check the percent */ + if ((100*(uint32_t)mi->padding_sent) / total_cells > + machine->max_padding_percent) { + return 1; // limit is reached. Stop. + } + } + + /* If circpad_max_global_padding_pct is non-zero, and we've + * sent more than the global padding cell limit, then check our + * gloabl tor process percentage limit on padding. */ + if (circpad_global_max_padding_percent && + circpad_global_padding_sent >= circpad_global_allowed_cells) { + uint64_t total_cells = circpad_global_padding_sent + + circpad_global_nonpadding_sent; + + /* Check the percent */ + if ((100*circpad_global_padding_sent) / total_cells > + circpad_global_max_padding_percent) { + return 1; // global limit reached. Stop. + } + } + + return 0; // All good! +} + +/** + * Schedule the next padding time according to the machineinfo on a + * circuit. + * + * The histograms represent inter-packet-delay. Whenever you get an packet + * event you should be scheduling your next timer (after cancelling any old + * ones and updating tokens accordingly). + * + * Returns 1 if we decide to transition states (due to infinity bin), + * 0 otherwise. + */ +circpad_decision_t +circpad_machine_schedule_padding(circpad_machineinfo_t *mi) +{ + circpad_delay_t in_usec = 0; + struct timeval timeout; + tor_assert(mi); + + // Don't pad in end (but also don't cancel any previously + // scheduled padding either). + if (mi->current_state == CIRCPAD_STATE_END) { + log_fn(LOG_INFO, LD_CIRC, "Padding end state"); + return CIRCPAD_STATE_UNCHANGED; + } + + /* Check our padding limits */ + if (circpad_machine_reached_padding_limit(mi)) { + if (CIRCUIT_IS_ORIGIN(mi->on_circ)) { + log_fn(LOG_INFO, LD_CIRC, + "Padding machine has reached padding limit on circuit %u", + TO_ORIGIN_CIRCUIT(mi->on_circ)->global_identifier); + } else { + log_fn(LOG_INFO, LD_CIRC, + "Padding machine has reached padding limit on circuit %"PRIu64 + ", %d", + mi->on_circ->n_chan ? mi->on_circ->n_chan->global_identifier : 0, + mi->on_circ->n_circ_id); + } + return CIRCPAD_STATE_UNCHANGED; + } + + if (mi->is_padding_timer_scheduled) { + /* Cancel current timer (if any) */ + timer_disable(mi->padding_timer); + mi->is_padding_timer_scheduled = 0; + } + + /* in_usec = in microseconds */ + in_usec = circpad_machine_sample_delay(mi); + mi->padding_scheduled_at_usec = monotime_absolute_usec(); + log_fn(LOG_INFO,LD_CIRC,"\tPadding in %u usec", in_usec); + + // Don't schedule if we have infinite delay. + if (in_usec == CIRCPAD_DELAY_INFINITE) { + return circpad_internal_event_infinity(mi); + } + + if (mi->state_length == 0) { + /* If we're at length 0, that means we hit 0 after sending + * a cell earlier, and emitted an event for it, but + * for whatever reason we did not decide to change states then. + * So maybe the machine is waiting for bins empty, or for an + * infinity event later? That would be a strange machine, + * but there's no reason to make it impossible. */ + return CIRCPAD_STATE_UNCHANGED; + } + + if (in_usec <= 0) { + return circpad_send_padding_cell_for_callback(mi); + } + + timeout.tv_sec = in_usec/TOR_USEC_PER_SEC; + timeout.tv_usec = (in_usec%TOR_USEC_PER_SEC); + + log_fn(LOG_INFO, LD_CIRC, "\tPadding in %u sec, %u usec", + (unsigned)timeout.tv_sec, (unsigned)timeout.tv_usec); + + if (mi->padding_timer) { + timer_set_cb(mi->padding_timer, circpad_send_padding_callback, mi); + } else { + mi->padding_timer = + timer_new(circpad_send_padding_callback, mi); + } + timer_schedule(mi->padding_timer, &timeout); + mi->is_padding_timer_scheduled = 1; + + // TODO-MP-AP: Unify with channelpadding counter + //rep_hist_padding_count_timers(++total_timers_pending); + + return CIRCPAD_STATE_UNCHANGED; +} + +/** + * If the machine transitioned to the END state, we need + * to check to see if it wants us to shut it down immediately. + * If it does, then we need to send the appropate negotation commands + * depending on which side it is. + * + * After this function is called, mi may point to freed memory. Do + * not access it. + */ +static void +circpad_machine_transitioned_to_end(circpad_machineinfo_t *mi) +{ + const circpad_machine_t *machine = CIRCPAD_GET_MACHINE(mi); + + /* + * We allow machines to shut down and delete themselves as opposed + * to just going back to START or waiting forever in END so that + * we can handle the case where this machine started while it was + * the only machine that matched conditions, but *since* then more + * "higher ranking" machines now match the conditions, and would + * be given a chance to take precidence over this one in + * circpad_add_matching_machines(). + * + * Returning to START or waiting forever in END would not give those + * other machines a chance to be launched, where as shutting down + * here does. + */ + if (machine->should_negotiate_end) { + circuit_t *on_circ = mi->on_circ; + if (machine->is_origin_side) { + /* We free the machine info here so that we can be replaced + * by a different machine. But we must leave the padding_machine + * in place to wait for the negotiated response */ + circpad_circuit_machineinfo_free_idx(on_circ, + machine->machine_index); + circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(on_circ), + machine->machine_num, + machine->target_hopnum, + CIRCPAD_COMMAND_STOP); + } else { + circpad_circuit_machineinfo_free_idx(on_circ, + machine->machine_index); + circpad_padding_negotiated(on_circ, + machine->machine_num, + CIRCPAD_COMMAND_STOP, + CIRCPAD_RESPONSE_OK); + on_circ->padding_machine[machine->machine_index] = NULL; + } + } +} + +/** + * Generic state transition function for padding state machines. + * + * Given an event and our mutable machine info, decide if/how to + * transition to a different state, and perform actions accordingly. + * + * Returns 1 if we transition states, 0 otherwise. + */ +circpad_decision_t +circpad_machine_transition(circpad_machineinfo_t *mi, + circpad_event_t event) +{ + const circpad_state_t *state = + circpad_machine_current_state(mi); + + /* If state is null we are in the end state. */ + if (!state) { + /* If we in end state we don't pad no matter what. */ + return CIRCPAD_STATE_UNCHANGED; + } + + /* Check if this event is ignored or causes a cancel */ + if (state->next_state[event] == CIRCPAD_STATE_IGNORE) { + return CIRCPAD_STATE_UNCHANGED; + } else if (state->next_state[event] == CIRCPAD_STATE_CANCEL) { + /* Check cancel events and cancel any pending padding */ + mi->padding_scheduled_at_usec = 0; + if (mi->is_padding_timer_scheduled) { + mi->is_padding_timer_scheduled = 0; + /* Cancel current timer (if any) */ + timer_disable(mi->padding_timer); + } + return CIRCPAD_STATE_UNCHANGED; + } else { + circpad_statenum_t s = state->next_state[event]; + /* See if we need to transition to any other states based on this event. + * Whenever a transition happens, even to our own state, we schedule + * padding. + * + * So if a state only wants to schedule padding for an event, it specifies + * a transition to itself. All non-specified events are ignored. + */ + log_fn(LOG_INFO, LD_CIRC, + "Circpad machine %d transitioning from %s to %s", + mi->machine_index, circpad_state_to_string(mi->current_state), + circpad_state_to_string(s)); + + /* If this is not the same state, switch and init tokens, + * otherwise just reschedule padding. */ + if (mi->current_state != s) { + mi->current_state = s; + circpad_machine_setup_tokens(mi); + circpad_choose_state_length(mi); + + /* If we transition to the end state, check to see + * if this machine wants to be shut down at end */ + if (s == CIRCPAD_STATE_END) { + circpad_machine_transitioned_to_end(mi); + /* We transitioned but we don't pad in end. Also, mi + * may be freed. Returning STATE_CHANGED prevents us + * from accessing it in any callers of this function. */ + return CIRCPAD_STATE_CHANGED; + } + + /* We transitioned to a new state, schedule padding */ + circpad_machine_schedule_padding(mi); + return CIRCPAD_STATE_CHANGED; + } + + /* We transitioned back to the same state. Schedule padding, + * and inform if that causes a state transition. */ + return circpad_machine_schedule_padding(mi); + } + + return CIRCPAD_STATE_UNCHANGED; +} + +/** + * Estimate the circuit RTT from the current middle hop out to the + * end of the circuit. + * + * We estimate RTT by calculating the time between "receive" and + * "send" at a middle hop. This is because we "receive" a cell + * from the origin, and then relay it towards the exit before a + * response comes back. It is that response time from the exit side + * that we want to measure, so that we can make use of it for synthetic + * response delays. + */ +static void +circpad_estimate_circ_rtt_on_received(circuit_t *circ, + circpad_machineinfo_t *mi) +{ + /* Origin circuits don't estimate RTT. They could do it easily enough, + * but they have no reason to use it in any delay calculations. */ + if (CIRCUIT_IS_ORIGIN(circ) || mi->stop_rtt_update) + return; + + /* If we already have a last receieved packet time, that means we + * did not get a response before this packet. The RTT estimate + * only makes sense if we do not have multiple packets on the + * wire, so stop estimating if this is the second packet + * back to back. However, for the first set of back-to-back + * packets, we can wait until the very first response comes back + * to us, to measure that RTT (for the response to optimistic + * data, for example). Hence stop_rtt_update is only checked + * in this received side function, and not in send side below. + */ + if (mi->last_received_time_usec) { + /* We also allow multiple back-to-back packets if the circuit is not + * opened, to handle var cells. + * XXX: Will this work with out var cell plans? Maybe not, + * since we're opened at the middle hop as soon as we process + * one var extend2 :/ */ + if (circ->state == CIRCUIT_STATE_OPEN) { + log_fn(LOG_INFO, LD_CIRC, + "Stopping padding RTT estimation on circuit (%"PRIu64 + ", %d) after two back to back packets. Current RTT: %d", + circ->n_chan ? circ->n_chan->global_identifier : 0, + circ->n_circ_id, mi->rtt_estimate_usec); + mi->stop_rtt_update = 1; + } + } else { + mi->last_received_time_usec = monotime_absolute_usec(); + } +} + +/** + * Handles the "send" side of RTT calculation at middle nodes. + * + * This function calculates the RTT from the middle to the end + * of the circuit by subtracting the last received cell timestamp + * from the current time. It allows back-to-back cells until + * the circuit is opened, to allow for var cell handshakes. + * XXX: Check our var cell plans to make sure this will work. + */ +static void +circpad_estimate_circ_rtt_on_send(circuit_t *circ, + circpad_machineinfo_t *mi) +{ + /* Origin circuits don't estimate RTT. They could do it easily enough, + * but they have no reason to use it in any delay calculations. */ + if (CIRCUIT_IS_ORIGIN(circ)) + return; + + /* If last_received_time_usec is non-zero, we are waiting for a response + * from the exit side. Calculate the time delta and use it as RTT. */ + if (mi->last_received_time_usec) { + circpad_time_t rtt_time = monotime_absolute_usec() - + mi->last_received_time_usec; + + /* Reset the last RTT packet time, so we can tell if two cells + * arrive back to back */ + mi->last_received_time_usec = 0; + + /* Use INT32_MAX to ensure the addition doesn't overflow */ + if (rtt_time >= INT32_MAX) { + log_fn(LOG_WARN,LD_CIRC, + "Circuit padding RTT estimate overflowed: %"PRIu64 + " vs %"PRIu64, monotime_absolute_usec(), + mi->last_received_time_usec); + return; + } + + /* If the old RTT estimate is lower than this one, use this one, because + * the circuit is getting longer. If this estimate is somehow + * faster than the previous, then maybe that was network jitter. + * In that case, average them. */ + if (mi->rtt_estimate_usec < (circpad_delay_t)rtt_time) { + mi->rtt_estimate_usec = (circpad_delay_t)rtt_time; + } else { + mi->rtt_estimate_usec += (circpad_delay_t)rtt_time; + mi->rtt_estimate_usec /= 2; + } + } else if (circ->state == CIRCUIT_STATE_OPEN) { + /* If last_received_time_usec is zero, then we have gotten two cells back + * to back. Stop estimating RTT in this case. Note that we only + * stop RTT update if the circuit is opened, to allow for RTT estimates + * of var cells during circ setup. */ + mi->stop_rtt_update = 1; + + if (!mi->rtt_estimate_usec) { + log_fn(LOG_NOTICE, LD_CIRC, + "Got two cells back to back on a circuit before estimating RTT."); + } + } +} + +/** + * A "non-padding" cell has been sent from this endpoint. React + * according to any padding state machines on the circuit. + * + * For origin circuits, this means we sent a cell into the network. + * For middle relay circuits, this means we sent a cell towards the + * origin. + */ +void +circpad_cell_event_nonpadding_sent(circuit_t *on_circ) +{ + /* Update global cell count */ + circpad_global_nonpadding_sent++; + + /* If there are no machines then this loop should not iterate */ + FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) { + /* First, update any RTT estimate */ + circpad_estimate_circ_rtt_on_send(on_circ, on_circ->padding_info[i]); + + /* Remove a token: this is the idea of adaptive padding, since we have an + * ideal distribution that we want our distribution to look like. */ + if (!circpad_machine_remove_token(on_circ->padding_info[i])) { + /* If removing a token did not cause a transition, check if + * non-padding sent event should */ + circpad_machine_transition(on_circ->padding_info[i], + CIRCPAD_EVENT_NONPADDING_SENT); + } + } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; +} + +/** + * A "non-padding" cell has been received by this endpoint. React + * according to any padding state machines on the circuit. + * + * For origin circuits, this means we read a cell from the network. + * For middle relay circuits, this means we received a cell from the + * origin. + */ +void +circpad_cell_event_nonpadding_received(circuit_t *on_circ) +{ + FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) { + /* First, update any RTT estimate */ + circpad_estimate_circ_rtt_on_received(on_circ, on_circ->padding_info[i]); + + circpad_machine_transition(on_circ->padding_info[i], + CIRCPAD_EVENT_NONPADDING_RECV); + } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; +} + +/** + * A padding cell has been sent from this endpoint. React + * according to any padding state machines on the circuit. + * + * For origin circuits, this means we sent a cell into the network. + * For middle relay circuits, this means we sent a cell towards the + * origin. + */ +void +circpad_cell_event_padding_sent(circuit_t *on_circ) +{ + FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) { + circpad_machine_transition(on_circ->padding_info[i], + CIRCPAD_EVENT_PADDING_SENT); + } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; +} + +/** + * A padding cell has been received by this endpoint. React + * according to any padding state machines on the circuit. + * + * For origin circuits, this means we read a cell from the network. + * For middle relay circuits, this means we received a cell from the + * origin. + */ +void +circpad_cell_event_padding_received(circuit_t *on_circ) +{ + /* identical to padding sent */ + FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) { + circpad_machine_transition(on_circ->padding_info[i], + CIRCPAD_EVENT_PADDING_RECV); + } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; +} + +/** + * An "infinite" delay has ben chosen from one of our histograms. + * + * "Infinite" delays mean don't send padding -- but they can also + * mean transition to another state depending on the state machine + * definitions. Check the rules and react accordingly. + * + * Return 1 if we decide to transition, 0 otherwise. + */ +circpad_decision_t +circpad_internal_event_infinity(circpad_machineinfo_t *mi) +{ + return circpad_machine_transition(mi, CIRCPAD_EVENT_INFINITY); +} + +/** + * All of the bins of our current state's histogram's are empty. + * + * Check to see if this means transition to another state, and if + * not, refill the tokens. + * + * Return 1 if we decide to transition, 0 otherwise. + */ +circpad_decision_t +circpad_internal_event_bins_empty(circpad_machineinfo_t *mi) +{ + if (circpad_machine_transition(mi, CIRCPAD_EVENT_BINS_EMPTY) + == CIRCPAD_STATE_CHANGED) { + return CIRCPAD_STATE_CHANGED; + } else { + /* If we dont transition, then we refill the tokens */ + circpad_machine_setup_tokens(mi); + return CIRCPAD_STATE_UNCHANGED; + } +} + +/** + * This state has used up its cell count. Emit the event and + * see if we transition. + * + * Return 1 if we decide to transition, 0 otherwise. + */ +circpad_decision_t +circpad_internal_event_state_length_up(circpad_machineinfo_t *mi) +{ + return circpad_machine_transition(mi, CIRCPAD_EVENT_LENGTH_COUNT); +} + +/** + * Returns true if the circuit matches the conditions. + */ +static inline bool +circpad_machine_conditions_met(origin_circuit_t *circ, + const circpad_machine_t *machine) +{ + if (!(circpad_circ_purpose_to_mask(TO_CIRCUIT(circ)->purpose) + & machine->conditions.purpose_mask)) + return 0; + + if (machine->conditions.requires_vanguards) { + const or_options_t *options = get_options(); + + /* Pinned middles are effectively vanguards */ + if (!(options->HSLayer2Nodes || options->HSLayer3Nodes)) + return 0; + } + + /* We check for any bits set in the circuit state mask so that machines + * can say any of the following through their state bitmask: + * "I want to apply to circuits with either streams or no streams"; OR + * "I only want to apply to circuits with streams"; OR + * "I only want to apply to circuits without streams". */ + if (!(circpad_circuit_state(circ) & machine->conditions.state_mask)) + return 0; + + if (circuit_get_cpath_opened_len(circ) < machine->conditions.min_hops) + return 0; + + return 1; +} + +/** + * Returns a minimized representation of the circuit state. + * + * The padding code only cares if the circuit is building, + * opened, used for streams, and/or still has relay early cells. + * This returns a bitmask of all state properities that apply to + * this circuit. + */ +static inline +circpad_circuit_state_t +circpad_circuit_state(origin_circuit_t *circ) +{ + circpad_circuit_state_t retmask = 0; + + if (circ->p_streams) + retmask |= CIRCPAD_CIRC_STREAMS; + else + retmask |= CIRCPAD_CIRC_NO_STREAMS; + + /* We use has_opened to prevent cannibialized circs from flapping. */ + if (circ->has_opened) + retmask |= CIRCPAD_CIRC_OPENED; + else + retmask |= CIRCPAD_CIRC_BUILDING; + + if (circ->remaining_relay_early_cells > 0) + retmask |= CIRCPAD_CIRC_HAS_RELAY_EARLY; + else + retmask |= CIRCPAD_CIRC_HAS_NO_RELAY_EARLY; + + return retmask; +} + +/** + * Convert a normal circuit purpose into a bitmask that we can + * use for determining matching circuits. + */ +static inline +circpad_purpose_mask_t +circpad_circ_purpose_to_mask(uint8_t circ_purpose) +{ + /* Treat OR circ purposes as ignored. They should not be passed here*/ + if (BUG(circ_purpose <= CIRCUIT_PURPOSE_OR_MAX_)) { + return 0; + } + + /* Treat new client circuit purposes as "OMG ITS EVERYTHING". + * This also should not happen */ + if (BUG(circ_purpose - CIRCUIT_PURPOSE_OR_MAX_ - 1 > 32)) { + return CIRCPAD_PURPOSE_ALL; + } + + /* Convert the purpose to a bit position */ + return 1 << (circ_purpose - CIRCUIT_PURPOSE_OR_MAX_ - 1); +} + +/** + * Shut down any machines whose conditions no longer match + * the current circuit. + */ +static void +circpad_shutdown_old_machines(origin_circuit_t *on_circ) +{ + circuit_t *circ = TO_CIRCUIT(on_circ); + + FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, circ) { + if (!circpad_machine_conditions_met(on_circ, + circ->padding_machine[i])) { + // Clear machineinfo (frees timers) + circpad_circuit_machineinfo_free_idx(circ, i); + // Send padding negotiate stop + circpad_negotiate_padding(on_circ, + circ->padding_machine[i]->machine_num, + circ->padding_machine[i]->target_hopnum, + CIRCPAD_COMMAND_STOP); + } + } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; +} + +/** + * Negotiate new machines that would apply to this circuit. + * + * This function checks to see if we have any free machine indexes, + * and for each free machine index, it initializes the most recently + * added origin-side padding machine that matches the target machine + * index and circuit conditions, and negotiates it with the appropriate + * middle relay. + */ +static void +circpad_add_matching_machines(origin_circuit_t *on_circ) +{ + circuit_t *circ = TO_CIRCUIT(on_circ); + +#ifdef TOR_UNIT_TESTS + /* Tests don't have to init our padding machines */ + if (!origin_padding_machines) + return; +#endif + + /* If padding negotiation failed before, do not try again */ + if (on_circ->padding_negotiation_failed) + return; + + FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) { + /* If there is a padding machine info, this index is occupied. + * No need to check conditions for this index. */ + if (circ->padding_info[i]) + continue; + + /* We have a free machine index. Check the origin padding + * machines in reverse order, so that more recently added + * machines take priority over older ones. */ + SMARTLIST_FOREACH_REVERSE_BEGIN(origin_padding_machines, + circpad_machine_t *, + machine) { + /* Machine definitions have a specific target machine index. + * This is so event ordering is deterministic with respect + * to which machine gets events first when there are two + * machines installed on a circuit. Make sure we only + * add this machine if its target machine index is free. */ + if (machine->machine_index == i && + circpad_machine_conditions_met(on_circ, machine)) { + + // We can only replace this machine if the target hopnum + // is the same, otherwise we'll get invalid data + if (circ->padding_machine[i]) { + if (circ->padding_machine[i]->target_hopnum != + machine->target_hopnum) + continue; + /* Replace it. (Don't free - is global). */ + circ->padding_machine[i] = NULL; + } + + /* Set up the machine immediately so that the slot is occupied. + * We will tear it down on error return, or if there is an error + * response from the relay. */ + circpad_setup_machine_on_circ(circ, machine); + if (circpad_negotiate_padding(on_circ, machine->machine_num, + machine->target_hopnum, + CIRCPAD_COMMAND_START) < 0) { + circpad_circuit_machineinfo_free_idx(circ, i); + circ->padding_machine[i] = NULL; + on_circ->padding_negotiation_failed = 1; + } else { + /* Success. Don't try any more machines */ + return; + } + } + } SMARTLIST_FOREACH_END(machine); + } FOR_EACH_CIRCUIT_MACHINE_END; +} + +/** + * Event that tells us we added a hop to an origin circuit. + * + * This event is used to decide if we should create a padding machine + * on a circuit. + */ +void +circpad_machine_event_circ_added_hop(origin_circuit_t *on_circ) +{ + /* Since our padding conditions do not specify a max_hops, + * all we can do is add machines here */ + circpad_add_matching_machines(on_circ); +} + +/** + * Event that tells us that an origin circuit is now built. + * + * Shut down any machines that only applied to un-built circuits. + * Activate any new ones. + */ +void +circpad_machine_event_circ_built(origin_circuit_t *circ) +{ + circpad_shutdown_old_machines(circ); + circpad_add_matching_machines(circ); +} + +/** + * Circpad purpose changed event. + * + * Shut down any machines that don't apply to our circ purpose. + * Activate any new ones that do. + */ +void +circpad_machine_event_circ_purpose_changed(origin_circuit_t *circ) +{ + circpad_shutdown_old_machines(circ); + circpad_add_matching_machines(circ); +} + +/** + * Event that tells us that an origin circuit is out of RELAY_EARLY + * cells. + * + * Shut down any machines that only applied to RELAY_EARLY circuits. + * Activate any new ones. + */ +void +circpad_machine_event_circ_has_no_relay_early(origin_circuit_t *circ) +{ + circpad_shutdown_old_machines(circ); + circpad_add_matching_machines(circ); +} + +/** + * Streams attached event. + * + * Called from link_apconn_to_circ() and handle_hs_exit_conn() + * + * Shut down any machines that only applied to machines without + * streams. Activate any new ones. + */ +void +circpad_machine_event_circ_has_streams(origin_circuit_t *circ) +{ + circpad_shutdown_old_machines(circ); + circpad_add_matching_machines(circ); +} + +/** + * Streams detached event. + * + * Called from circuit_detach_stream() + * + * Shut down any machines that only applied to machines without + * streams. Activate any new ones. + */ +void +circpad_machine_event_circ_has_no_streams(origin_circuit_t *circ) +{ + circpad_shutdown_old_machines(circ); + circpad_add_matching_machines(circ); +} + +/** + * Verify that padding is coming from the expected hop. + * + * Returns true if from_hop matches the target hop from + * one of our padding machines. + * + * Returns false if we're not an origin circuit, or if from_hop + * does not match one of the padding machines. + */ +bool +circpad_padding_is_from_expected_hop(circuit_t *circ, + crypt_path_t *from_hop) +{ + crypt_path_t *target_hop = NULL; + if (!CIRCUIT_IS_ORIGIN(circ)) + return 0; + + FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) { + /* We have to check padding_machine and not padding_info/active + * machines here because padding may arrive after we shut down a + * machine. The info is gone, but the padding_machine waits + * for the padding_negotiated response to come back. */ + if (!circ->padding_machine[i]) + continue; + + target_hop = circuit_get_cpath_hop(TO_ORIGIN_CIRCUIT(circ), + circ->padding_machine[i]->target_hopnum); + + if (target_hop == from_hop) + return 1; + } FOR_EACH_CIRCUIT_MACHINE_END; + + return 0; +} + +/** + * Deliver circpad events for an "unrecognized cell". + * + * Unrecognized cells are sent to relays and are forwarded + * onto the next hop of their circuits. Unrecognized cells + * are by definition not padding. We need to tell relay-side + * state machines that a non-padding cell was sent or received, + * depending on the direction, so they can update their histograms + * and decide to pad or not. + */ +void +circpad_deliver_unrecognized_cell_events(circuit_t *circ, + cell_direction_t dir) +{ + // We should never see unrecognized cells at origin. + // Our caller emits a warn when this happens. + if (CIRCUIT_IS_ORIGIN(circ)) { + return; + } + + if (dir == CELL_DIRECTION_OUT) { + /* When direction is out (away from origin), then we received non-padding + cell coming from the origin to us. */ + circpad_cell_event_nonpadding_received(circ); + } else if (dir == CELL_DIRECTION_IN) { + /* It's in and not origin, so the cell is going away from us. + * So we are relaying a non-padding cell towards the origin. */ + circpad_cell_event_nonpadding_sent(circ); + } +} + +/** + * Deliver circpad events for "recognized" relay cells. + * + * Recognized cells are destined for this hop, either client or middle. + * Check if this is a padding cell or not, and send the appropiate + * received event. + */ +void +circpad_deliver_recognized_relay_cell_events(circuit_t *circ, + uint8_t relay_command, + crypt_path_t *layer_hint) +{ + /* Padding negotiate cells are ignored by the state machines + * for simplicity. */ + if (relay_command == RELAY_COMMAND_PADDING_NEGOTIATE || + relay_command == RELAY_COMMAND_PADDING_NEGOTIATED) { + return; + } + + if (relay_command == RELAY_COMMAND_DROP) { + rep_hist_padding_count_read(PADDING_TYPE_DROP); + + if (CIRCUIT_IS_ORIGIN(circ)) { + if (circpad_padding_is_from_expected_hop(circ, layer_hint)) { + circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), 0); + } else { + /* This is unexpected padding. Ignore it for now. */ + return; + } + } + + /* The cell should be recognized by now, which means that we are on the + destination, which means that we received a padding cell. We might be + the client or the Middle node, still, because leaky-pipe. */ + circpad_cell_event_padding_received(circ); + log_fn(LOG_INFO, LD_CIRC, "Got padding cell on %s circuit %u.", + CIRCUIT_IS_ORIGIN(circ) ? "origin" : "non-origin", + CIRCUIT_IS_ORIGIN(circ) ? + TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0); + } else { + /* We received a non-padding cell on the edge */ + circpad_cell_event_nonpadding_received(circ); + } +} + +/** + * Deliver circpad events for relay cells sent from us. + * + * If this is a padding cell, update our padding stats + * and deliver the event. Otherwise just deliver the event. + */ +void +circpad_deliver_sent_relay_cell_events(circuit_t *circ, + uint8_t relay_command) +{ + /* Padding negotiate cells are ignored by the state machines + * for simplicity. */ + if (relay_command == RELAY_COMMAND_PADDING_NEGOTIATE || + relay_command == RELAY_COMMAND_PADDING_NEGOTIATED) { + return; + } + + /* RELAY_COMMAND_DROP is the multi-hop (aka circuit-level) padding cell in + * tor. (CELL_PADDING is a channel-level padding cell, which is not relayed + * or processed here) */ + if (relay_command == RELAY_COMMAND_DROP) { + /* Optimization: The event for RELAY_COMMAND_DROP is sent directly + * from circpad_send_padding_cell_for_callback(). This is to avoid + * putting a cell_t and a relay_header_t on the stack repeatedly + * if we decide to send a long train of padidng cells back-to-back + * with 0 delay. So we do nothing here. */ + return; + } else { + /* This is a non-padding cell sent from the client or from + * this node. */ + circpad_cell_event_nonpadding_sent(circ); + } +} + +/** + * Initialize the states array for a circpad machine. + */ +void +circpad_machine_states_init(circpad_machine_t *machine, + circpad_statenum_t num_states) +{ + if (BUG(num_states > CIRCPAD_MAX_MACHINE_STATES)) { + num_states = CIRCPAD_MAX_MACHINE_STATES; + } + + machine->num_states = num_states; + machine->states = tor_malloc_zero(sizeof(circpad_state_t)*num_states); + + /* Initialize the default next state for all events to + * "ignore" -- if events aren't specified, they are ignored. */ + for (circpad_statenum_t s = 0; s < num_states; s++) { + for (int e = 0; e < CIRCPAD_NUM_EVENTS; e++) { + machine->states[s].next_state[e] = CIRCPAD_STATE_IGNORE; + } + } +} + +static void +circpad_setup_machine_on_circ(circuit_t *on_circ, + const circpad_machine_t *machine) +{ + if (CIRCUIT_IS_ORIGIN(on_circ) && !machine->is_origin_side) { + log_fn(LOG_WARN, LD_BUG, + "Can't set up non-origin machine on origin circuit!"); + return; + } + + if (!CIRCUIT_IS_ORIGIN(on_circ) && machine->is_origin_side) { + log_fn(LOG_WARN, LD_BUG, + "Can't set up origin machine on non-origin circuit!"); + return; + } + + tor_assert_nonfatal(on_circ->padding_machine[machine->machine_index] + == NULL); + tor_assert_nonfatal(on_circ->padding_info[machine->machine_index] == NULL); + + on_circ->padding_info[machine->machine_index] = + circpad_circuit_machineinfo_new(on_circ, machine->machine_index); + on_circ->padding_machine[machine->machine_index] = machine; +} + +static void +circpad_circ_client_machine_init(void) +{ + circpad_machine_t *circ_client_machine + = tor_malloc_zero(sizeof(circpad_machine_t)); + + // XXX: Better conditions for merge.. Or disable this machine in + // merge? + circ_client_machine->conditions.min_hops = 2; + circ_client_machine->conditions.state_mask = + CIRCPAD_CIRC_BUILDING|CIRCPAD_CIRC_OPENED|CIRCPAD_CIRC_HAS_RELAY_EARLY; + circ_client_machine->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL; + + circ_client_machine->target_hopnum = 2; + circ_client_machine->is_origin_side = 1; + + /* Start, gap, burst */ + circpad_machine_states_init(circ_client_machine, 3); + + circ_client_machine->states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + + circ_client_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + circ_client_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST; + + /* If we are in burst state, and we send a non-padding cell, then we cancel + the timer for the next padding cell: + We dont want to send fake extends when actual extends are going on */ + circ_client_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_CANCEL; + + circ_client_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_BINS_EMPTY] = CIRCPAD_STATE_END; + + circ_client_machine->states[CIRCPAD_STATE_BURST].token_removal = + CIRCPAD_TOKEN_REMOVAL_CLOSEST; + + // FIXME: Tune this histogram + circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_len = 2; + circ_client_machine->states[CIRCPAD_STATE_BURST].start_usec = 500; + circ_client_machine->states[CIRCPAD_STATE_BURST].range_usec = 1000000; + /* We have 5 tokens in the histogram, which means that all circuits will look + * like they have 7 hops (since we start this machine after the second hop, + * and tokens are decremented for any valid hops, and fake extends are + * used after that -- 2+5==7). */ + circ_client_machine->states[CIRCPAD_STATE_BURST].histogram[0] = 5; + circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_total_tokens = 5; + + circ_client_machine->machine_num = smartlist_len(origin_padding_machines); + smartlist_add(origin_padding_machines, circ_client_machine); +} + +static void +circpad_circ_responder_machine_init(void) +{ + circpad_machine_t *circ_responder_machine + = tor_malloc_zero(sizeof(circpad_machine_t)); + + /* Shut down the machine after we've sent enough packets */ + circ_responder_machine->should_negotiate_end = 1; + + /* The relay-side doesn't care what hopnum it is, but for consistency, + * let's match the client */ + circ_responder_machine->target_hopnum = 2; + circ_responder_machine->is_origin_side = 0; + + /* Start, gap, burst */ + circpad_machine_states_init(circ_responder_machine, 3); + + /* This is the settings of the state machine. In the future we are gonna + serialize this into the consensus or the torrc */ + + /* We transition to the burst state on padding receive and on non-padding + * recieve */ + circ_responder_machine->states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST; + circ_responder_machine->states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + + /* Inside the burst state we _stay_ in the burst state when a non-padding + * is sent */ + circ_responder_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_BURST; + + /* Inside the burst state we transition to the gap state when we receive a + * padding cell */ + circ_responder_machine->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_GAP; + + /* These describe the padding charasteristics when in burst state */ + + /* use_rtt_estimate tries to estimate how long padding cells take to go from + C->M, and uses that as what as the base of the histogram */ + circ_responder_machine->states[CIRCPAD_STATE_BURST].use_rtt_estimate = 1; + /* The histogram is 2 bins: an empty one, and infinity */ + circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram_len = 2; + circ_responder_machine->states[CIRCPAD_STATE_BURST].start_usec = 5000; + circ_responder_machine->states[CIRCPAD_STATE_BURST].range_usec = 1000000; + /* During burst state we wait forever for padding to arrive. + + We are waiting for a padding cell from the client to come in, so that we + respond, and we immitate how extend looks like */ + circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram[0] = 0; + // Only infinity bin: + circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram[1] = 1; + circ_responder_machine->states[CIRCPAD_STATE_BURST]. + histogram_total_tokens = 1; + + /* From the gap state, we _stay_ in the gap state, when we receive padding + * or non padding */ + circ_responder_machine->states[CIRCPAD_STATE_GAP]. + next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_GAP; + circ_responder_machine->states[CIRCPAD_STATE_GAP]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_GAP; + + /* And from the gap state, we go to the end, when the bins are empty or a + * non-padding cell is sent */ + circ_responder_machine->states[CIRCPAD_STATE_GAP]. + next_state[CIRCPAD_EVENT_BINS_EMPTY] = CIRCPAD_STATE_END; + circ_responder_machine->states[CIRCPAD_STATE_GAP]. + next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_END; + + // FIXME: Tune this histogram + + /* The gap state is the delay you wait after you receive a padding cell + before you send a padding response */ + circ_responder_machine->states[CIRCPAD_STATE_GAP].use_rtt_estimate = 1; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_len = 6; + circ_responder_machine->states[CIRCPAD_STATE_GAP].start_usec = 5000; + circ_responder_machine->states[CIRCPAD_STATE_GAP].range_usec = 1000000; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[0] = 0; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[1] = 1; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[2] = 2; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[3] = 2; + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[4] = 1; + /* Total number of tokens */ + circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_total_tokens = 6; + circ_responder_machine->states[CIRCPAD_STATE_GAP].token_removal = + CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC; + + circ_responder_machine->machine_num = smartlist_len(relay_padding_machines); + smartlist_add(relay_padding_machines, circ_responder_machine); +} + +/** + * Initialize all of our padding machines. + * + * This is called at startup. It sets up some global machines, and then + * loads some from torrc, and from the tor consensus. + */ +void +circpad_machines_init(void) +{ + tor_assert_nonfatal(origin_padding_machines == NULL); + tor_assert_nonfatal(relay_padding_machines == NULL); + + origin_padding_machines = smartlist_new(); + relay_padding_machines = smartlist_new(); + + // TODO: Parse machines from consensus and torrc + + circpad_circ_client_machine_init(); + circpad_circ_responder_machine_init(); +} + +/** + * Free our padding machines + */ +void +circpad_machines_free(void) +{ + if (origin_padding_machines) { + SMARTLIST_FOREACH(origin_padding_machines, + circpad_machine_t *, + m, tor_free(m->states); tor_free(m)); + smartlist_free(origin_padding_machines); + } + + if (relay_padding_machines) { + SMARTLIST_FOREACH(relay_padding_machines, + circpad_machine_t *, + m, tor_free(m->states); tor_free(m)); + smartlist_free(relay_padding_machines); + } +} + +/** + * Check the Protover info to see if a node supports padding. + */ +static bool +circpad_node_supports_padding(const node_t *node) +{ + if (node->rs) { + log_fn(LOG_INFO, LD_CIRC, "Checking padding: %s", + node->rs->pv.supports_padding ? "supported" : "unsupported"); + return node->rs->pv.supports_padding; + } + + log_fn(LOG_INFO, LD_CIRC, "Empty routerstatus in padding check"); + return 0; +} + +/** + * Get a node_t for the nth hop in our circuit, starting from 1. + * + * Returns node_t from the consensus for that hop, if it is opened. + * Otherwise returns NULL. + */ +static const node_t * +circuit_get_nth_node(origin_circuit_t *circ, int hop) +{ + crypt_path_t *iter = circuit_get_cpath_hop(circ, hop); + + if (!iter || iter->state != CPATH_STATE_OPEN) + return NULL; + + return node_get_by_id(iter->extend_info->identity_digest); +} + +/** + * Return true if a particular circuit supports padding + * at the desired hop. + */ +static bool +circpad_circuit_supports_padding(origin_circuit_t *circ, + int target_hopnum) +{ + const node_t *hop; + + if (!(hop = circuit_get_nth_node(circ, target_hopnum))) { + return 0; + } + + return circpad_node_supports_padding(hop); +} + +/** + * Try to negotiate padding. + * + * Returns -1 on error, 0 on success. + */ +signed_error_t +circpad_negotiate_padding(origin_circuit_t *circ, + circpad_machine_num_t machine, + uint8_t target_hopnum, + uint8_t command) +{ + circpad_negotiate_t type; + cell_t cell; + ssize_t len; + + /* Check that the target hop lists support for padding in + * its ProtoVer fields */ + if (!circpad_circuit_supports_padding(circ, target_hopnum)) { + return -1; + } + + memset(&cell, 0, sizeof(cell_t)); + memset(&type, 0, sizeof(circpad_negotiate_t)); + // This gets reset to RELAY_EARLY appropriately by + // relay_send_command_from_edge_. At least, it looks that way. + // QQQ-MP-AP: Verify that. + cell.command = CELL_RELAY; + + circpad_negotiate_set_command(&type, command); + circpad_negotiate_set_version(&type, 0); + circpad_negotiate_set_machine_type(&type, machine); + + if ((len = circpad_negotiate_encode(cell.payload, CELL_PAYLOAD_SIZE, + &type)) < 0) + return -1; + + log_fn(LOG_INFO,LD_CIRC, "Negotiating padding on circuit %u", + circ->global_identifier); + + return circpad_send_command_to_hop(circ, target_hopnum, + RELAY_COMMAND_PADDING_NEGOTIATE, + cell.payload, len); +} + +/** + * Try to negotiate padding. + * + * Returns 1 if successful (or already set up), 0 otherwise. + */ +bool +circpad_padding_negotiated(circuit_t *circ, + circpad_machine_num_t machine, + uint8_t command, + uint8_t response) +{ + circpad_negotiated_t type; + cell_t cell; + ssize_t len; + + memset(&cell, 0, sizeof(cell_t)); + memset(&type, 0, sizeof(circpad_negotiated_t)); + // This gets reset to RELAY_EARLY appropriately by + // relay_send_command_from_edge_. At least, it looks that way. + // QQQ-MP-AP: Verify that. + cell.command = CELL_RELAY; + + circpad_negotiated_set_command(&type, command); + circpad_negotiated_set_response(&type, response); + circpad_negotiated_set_version(&type, 0); + circpad_negotiated_set_machine_type(&type, machine); + + if ((len = circpad_negotiated_encode(cell.payload, CELL_PAYLOAD_SIZE, + &type)) < 0) + return 0; + + /* Use relay_send because we're from the middle to the origin. We don't + * need to specify a target hop or layer_hint. */ + return relay_send_command_from_edge(0, circ, + RELAY_COMMAND_PADDING_NEGOTIATED, + (void*)cell.payload, + (size_t)len, NULL) == 0; +} + +/** + * Parse and react to a padding_negotiate cell. + * + * This is called at the middle node upon receipt of the client's choice of + * state machine, so that it can use the requested state machine index, if + * it is available. + * + * Returns -1 on error, 0 on success. + */ +signed_error_t +circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell) +{ + int retval = 0; + circpad_negotiate_t *negotiate; + + if (CIRCUIT_IS_ORIGIN(circ)) { + log_fn(LOG_WARN, LD_PROTOCOL, + "Padding negotiate cell unsupported at origin."); + return -1; + } + + if (circpad_negotiate_parse(&negotiate, cell->payload+RELAY_HEADER_SIZE, + CELL_PAYLOAD_SIZE-RELAY_HEADER_SIZE) < 0) { + log_fn(LOG_WARN, LD_CIRC, + "Received malformed PADDING_NEGOTIATE cell; dropping."); + return -1; + } + + if (negotiate->command == CIRCPAD_COMMAND_STOP) { + /* Free the machine corresponding to this machine type */ + free_circ_machineinfos_with_machine_num(circ, negotiate->machine_type); + log_fn(LOG_WARN, LD_CIRC, + "Received circuit padding stop command for unknown machine."); + goto err; + } else if (negotiate->command == CIRCPAD_COMMAND_START) { + SMARTLIST_FOREACH_BEGIN(relay_padding_machines, + const circpad_machine_t *, m) { + if (m->machine_num == negotiate->machine_type) { + circpad_setup_machine_on_circ(circ, m); + goto done; + } + } SMARTLIST_FOREACH_END(m); + } + + err: + retval = -1; + + done: + circpad_padding_negotiated(circ, negotiate->machine_type, + negotiate->command, + (retval == 0) ? CIRCPAD_RESPONSE_OK : CIRCPAD_RESPONSE_ERR); + circpad_negotiate_free(negotiate); + + return retval; +} + +/** + * Parse and react to a padding_negotiated cell. + * + * This is called at the origin upon receipt of the middle's response + * to our choice of state machine. + * + * Returns -1 on error, 0 on success. + */ +signed_error_t +circpad_handle_padding_negotiated(circuit_t *circ, cell_t *cell, + crypt_path_t *layer_hint) +{ + circpad_negotiated_t *negotiated; + + if (!CIRCUIT_IS_ORIGIN(circ)) { + log_fn(LOG_WARN, LD_PROTOCOL, + "Padding negotiated cell unsupported at non-origin."); + return -1; + } + + /* Verify this came from the expected hop */ + if (!circpad_padding_is_from_expected_hop(circ, layer_hint)) { + log_fn(LOG_WARN, LD_PROTOCOL, + "Padding negotiated cell from wrong hop!"); + return -1; + } + + if (circpad_negotiated_parse(&negotiated, cell->payload+RELAY_HEADER_SIZE, + CELL_PAYLOAD_SIZE-RELAY_HEADER_SIZE) < 0) { + log_fn(LOG_WARN, LD_CIRC, + "Received malformed PADDING_NEGOTIATED cell; " + "dropping."); + return -1; + } + + if (negotiated->command == CIRCPAD_COMMAND_STOP) { + /* There may not be a padding_info here if we shut down the + * machine in circpad_shutdown_old_machines(). Or, if + * circpad_add_matching_matchines() added a new machine, + * there may be a padding_machine for a different machine num + * than this response. */ + free_circ_machineinfos_with_machine_num(circ, negotiated->machine_type); + } else if (negotiated->command == CIRCPAD_COMMAND_START && + negotiated->response == CIRCPAD_RESPONSE_ERR) { + // This can happen due to consensus drift.. free the machines + // and be sad + free_circ_machineinfos_with_machine_num(circ, negotiated->machine_type); + TO_ORIGIN_CIRCUIT(circ)->padding_negotiation_failed = 1; + log_fn(LOG_INFO, LD_CIRC, + "Middle node did not accept our padding request."); + } + + circpad_negotiated_free(negotiated); + return 0; +} + +/* Serialization */ +// TODO: Should we use keyword=value here? Are there helpers for that? +#if 0 +static void +circpad_state_serialize(const circpad_state_t *state, + smartlist_t *chunks) +{ + smartlist_add_asprintf(chunks, " %u", state->histogram[0]); + for (int i = 1; i < state->histogram_len; i++) { + smartlist_add_asprintf(chunks, ",%u", + state->histogram[i]); + } + + smartlist_add_asprintf(chunks, " 0x%x", + state->transition_cancel_events); + + for (int i = 0; i < CIRCPAD_NUM_STATES; i++) { + smartlist_add_asprintf(chunks, ",0x%x", + state->transition_events[i]); + } + + smartlist_add_asprintf(chunks, " %u %u", + state->use_rtt_estimate, + state->token_removal); +} + +char * +circpad_machine_to_string(const circpad_machine_t *machine) +{ + smartlist_t *chunks = smartlist_new(); + char *out; + (void)machine; + + circpad_state_serialize(&machine->start, chunks); + circpad_state_serialize(&machine->gap, chunks); + circpad_state_serialize(&machine->burst, chunks); + + out = smartlist_join_strings(chunks, "", 0, NULL); + + SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp)); + smartlist_free(chunks); + return out; +} + +// XXX: Writeme +const circpad_machine_t * +circpad_string_to_machine(const char *str) +{ + (void)str; + return NULL; +} + +#endif -- cgit v1.2.3-54-g00ecf From d62340018c8d363ea67ef01dc4a740e47fce2a10 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Thu, 23 Aug 2018 20:31:16 +0000 Subject: Add relay crypto mock points for tests. Co-authored-by: George Kadianakis --- src/core/or/relay.c | 6 +++--- src/core/or/relay.h | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/core/or/relay.c b/src/core/or/relay.c index 452777b2fc..9c0f3bbbe3 100644 --- a/src/core/or/relay.c +++ b/src/core/or/relay.c @@ -355,11 +355,11 @@ circuit_receive_relay_cell(cell_t *cell, circuit_t *circ, * - Encrypt it to the right layer * - Append it to the appropriate cell_queue on circ. */ -static int -circuit_package_relay_cell(cell_t *cell, circuit_t *circ, +MOCK_IMPL(int, +circuit_package_relay_cell, (cell_t *cell, circuit_t *circ, cell_direction_t cell_direction, crypt_path_t *layer_hint, streamid_t on_stream, - const char *filename, int lineno) + const char *filename, int lineno)) { channel_t *chan; /* where to send the cell */ diff --git a/src/core/or/relay.h b/src/core/or/relay.h index db7f17b96c..e84727e373 100644 --- a/src/core/or/relay.h +++ b/src/core/or/relay.h @@ -78,6 +78,11 @@ void destroy_cell_queue_append(destroy_cell_queue_t *queue, void channel_unlink_all_circuits(channel_t *chan, smartlist_t *detached_out); MOCK_DECL(int, channel_flush_from_first_active_circuit, (channel_t *chan, int max)); +MOCK_DECL(int, circuit_package_relay_cell, (cell_t *cell, circuit_t *circ, + cell_direction_t cell_direction, + crypt_path_t *layer_hint, streamid_t on_stream, + const char *filename, int lineno)); + void update_circuit_on_cmux_(circuit_t *circ, cell_direction_t direction, const char *file, int lineno); #define update_circuit_on_cmux(circ, direction) \ -- cgit v1.2.3-54-g00ecf From a336d816a68e5eaddd9d80f7179699274b367a1d Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Wed, 2 Jan 2019 15:19:12 +0200 Subject: Circuit padding tests. Co-authored-by: George Kadianakis --- src/core/or/circuitpadding.c | 55 +- src/core/or/circuitpadding.h | 5 + src/lib/smartlist_core/smartlist_foreach.h | 5 + src/lib/time/compat_time.c | 4 +- src/lib/time/compat_time.h | 2 +- src/test/Makefile.nmake | 1 + src/test/include.am | 1 + src/test/test.c | 1 + src/test/test.h | 1 + src/test/test_circuitpadding.c | 2358 ++++++++++++++++++++++++++++ src/test/test_containers.c | 25 + 11 files changed, 2438 insertions(+), 20 deletions(-) create mode 100644 src/test/test_circuitpadding.c diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index 9d65e2cf22..6cfbf4ba56 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -157,7 +157,7 @@ circpad_circuit_machineinfo_new(circuit_t *on_circ, int machine_index) * invalid state. */ STATIC const circpad_state_t * -circpad_machine_current_state(circpad_machineinfo_t *mi) +circpad_machine_current_state(const circpad_machineinfo_t *mi) { const circpad_machine_t *machine = CIRCPAD_GET_MACHINE(mi); @@ -189,7 +189,7 @@ circpad_machine_current_state(circpad_machineinfo_t *mi) * It has a usec value of CIRCPAD_DELAY_INFINITE (UINT32_MAX). */ STATIC circpad_delay_t -circpad_histogram_bin_to_usec(circpad_machineinfo_t *mi, +circpad_histogram_bin_to_usec(const circpad_machineinfo_t *mi, circpad_hist_index_t bin) { const circpad_state_t *state = circpad_machine_current_state(mi); @@ -224,6 +224,18 @@ circpad_histogram_bin_to_usec(circpad_machineinfo_t *mi, CIRCPAD_DELAY_INFINITE); } +/** Return the midpoint of the histogram bin bin_index. */ +static circpad_delay_t +circpad_get_histogram_bin_midpoint(const circpad_machineinfo_t *mi, + int bin_index) +{ + circpad_delay_t left_bound = circpad_histogram_bin_to_usec(mi, bin_index); + circpad_delay_t right_bound = + circpad_histogram_bin_to_usec(mi, bin_index+1)-1; + + return left_bound + (right_bound - left_bound)/2; +} + /** * Return the bin that contains the usec argument. * "Contains" is defined as us in [lower, upper). @@ -446,6 +458,8 @@ circpad_machine_sample_delay(circpad_machineinfo_t *mi) tor_assert(curr_bin < CIRCPAD_INFINITY_BIN(state)); bin_start = circpad_histogram_bin_to_usec(mi, curr_bin); + /* We don't need to reduct 1 from the upper bound because the random range + * function below samples from [bin_start, bin_end) */ bin_end = circpad_histogram_bin_to_usec(mi, curr_bin+1); /* Truncate the high bin in case it's the infinity bin: @@ -532,7 +546,7 @@ circpad_distribution_sample(circpad_distribution_t dist) * greater than the target, and that has tokens remaining. */ static circpad_hist_index_t -circpad_machine_first_higher_index(circpad_machineinfo_t *mi, +circpad_machine_first_higher_index(const circpad_machineinfo_t *mi, circpad_delay_t target_bin_usec) { circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, @@ -554,7 +568,7 @@ circpad_machine_first_higher_index(circpad_machineinfo_t *mi, * target_bin_usec, and that still has tokens remaining. */ static circpad_hist_index_t -circpad_machine_first_lower_index(circpad_machineinfo_t *mi, +circpad_machine_first_lower_index(const circpad_machineinfo_t *mi, circpad_delay_t target_bin_usec) { circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, @@ -619,7 +633,9 @@ circpad_machine_remove_lower_token(circpad_machineinfo_t *mi, /** * Remove a token from the closest non-empty bin to the target. * - * If use_usec is true, measure "closest" in terms of bin start usec. + * If use_usec is true, measure "closest" in terms of the next closest bin + * midpoint. + * * If it is false, use bin index distance only. */ STATIC void @@ -639,8 +655,9 @@ circpad_machine_remove_closest_token(circpad_machineinfo_t *mi, return; } + /* Take care of edge cases first */ if (higher == mi->histogram_len && lower == -1) { - // Bins are empty + /* All bins are empty */ return; } else if (higher == mi->histogram_len) { /* All higher bins are empty */ @@ -654,6 +671,7 @@ circpad_machine_remove_closest_token(circpad_machineinfo_t *mi, return; } + /* Now handle the intermediate cases */ if (use_usec) { /* Find the closest bin midpoint to the target */ circpad_delay_t lower_usec = circpad_get_histogram_bin_midpoint(mi, lower); @@ -677,6 +695,9 @@ circpad_machine_remove_closest_token(circpad_machineinfo_t *mi, ENSURE_BIN_CAPACITY(lower); bin_to_remove = lower; } + mi->histogram[bin_to_remove]--; + log_debug(LD_GENERAL, "Removing token from bin %d", bin_to_remove); + return; } else { if (current - lower > higher - current) { // Higher bin is closer @@ -756,7 +777,7 @@ check_machine_token_supply(circpad_machineinfo_t *mi) * * Returns 1 if we transition states, 0 otherwise. */ -circpad_decision_t +STATIC circpad_decision_t circpad_machine_remove_token(circpad_machineinfo_t *mi) { const circpad_state_t *state = NULL; @@ -841,10 +862,10 @@ circpad_machine_remove_token(circpad_machineinfo_t *mi) * * Returns negative on error, 0 on success. */ -static signed_error_t -circpad_send_command_to_hop(origin_circuit_t *circ, uint8_t hopnum, - uint8_t relay_command, const uint8_t *payload, - ssize_t payload_len) +MOCK_IMPL(STATIC signed_error_t, +circpad_send_command_to_hop,(origin_circuit_t *circ, uint8_t hopnum, + uint8_t relay_command, const uint8_t *payload, + ssize_t payload_len)) { crypt_path_t *target_hop = circuit_get_cpath_hop(circ, hopnum); signed_error_t ret; @@ -1017,7 +1038,7 @@ circpad_new_consensus_params(const networkstatus_t *ns) * * Returns 1 if limits are set and we've hit them. Otherwise returns 0. */ -static bool +STATIC bool circpad_machine_reached_padding_limit(circpad_machineinfo_t *mi) { const circpad_machine_t *machine = CIRCPAD_GET_MACHINE(mi); @@ -1025,7 +1046,7 @@ circpad_machine_reached_padding_limit(circpad_machineinfo_t *mi) /* If machine_padding_pct is non-zero, and we've sent more * than the allowed count of padding cells, then check our * percent limits for this machine. */ - if (machine->max_padding_percent && + if (machine->max_padding_percent && mi->padding_sent >= machine->allowed_padding_count) { uint32_t total_cells = mi->padding_sent + mi->nonpadding_sent; @@ -1046,7 +1067,7 @@ circpad_machine_reached_padding_limit(circpad_machineinfo_t *mi) /* Check the percent */ if ((100*circpad_global_padding_sent) / total_cells > - circpad_global_max_padding_percent) { + circpad_global_max_padding_percent) { return 1; // global limit reached. Stop. } } @@ -1205,9 +1226,9 @@ circpad_machine_transitioned_to_end(circpad_machineinfo_t *mi) * * Returns 1 if we transition states, 0 otherwise. */ -circpad_decision_t -circpad_machine_transition(circpad_machineinfo_t *mi, - circpad_event_t event) +MOCK_IMPL(circpad_decision_t, +circpad_machine_transition,(circpad_machineinfo_t *mi, + circpad_event_t event)) { const circpad_state_t *state = circpad_machine_current_state(mi); diff --git a/src/core/or/circuitpadding.h b/src/core/or/circuitpadding.h index 24034a4548..4680c6be43 100644 --- a/src/core/or/circuitpadding.h +++ b/src/core/or/circuitpadding.h @@ -675,6 +675,11 @@ STATIC void circpad_machine_remove_closest_token(circpad_machineinfo_t *mi, bool use_usec); STATIC void circpad_machine_setup_tokens(circpad_machineinfo_t *mi); +MOCK_DECL(STATIC signed_error_t, +circpad_send_command_to_hop,(origin_circuit_t *circ, uint8_t hopnum, + uint8_t relay_command, const uint8_t *payload, + ssize_t payload_len)); + #ifdef TOR_UNIT_TESTS extern smartlist_t *origin_padding_machines; extern smartlist_t *relay_padding_machines; diff --git a/src/lib/smartlist_core/smartlist_foreach.h b/src/lib/smartlist_core/smartlist_foreach.h index 14f2930c9f..c9afebd6a2 100644 --- a/src/lib/smartlist_core/smartlist_foreach.h +++ b/src/lib/smartlist_core/smartlist_foreach.h @@ -83,6 +83,11 @@ ++var ## _sl_idx) { \ var = (sl)->list[var ## _sl_idx]; +/** Iterates over the items in smartlist sl in reverse order, similar to + * SMARTLIST_FOREACH_BEGIN + * + * NOTE: This macro is incompatible with SMARTLIST_DEL_CURRENT. + */ #define SMARTLIST_FOREACH_REVERSE_BEGIN(sl, type, var) \ STMT_BEGIN \ int var ## _sl_idx, var ## _sl_len=(sl)->num_used; \ diff --git a/src/lib/time/compat_time.c b/src/lib/time/compat_time.c index f1ddb4fdc4..387b0fad22 100644 --- a/src/lib/time/compat_time.c +++ b/src/lib/time/compat_time.c @@ -787,8 +787,8 @@ monotime_absolute_nsec(void) return monotime_diff_nsec(&initialized_at, &now); } -uint64_t -monotime_absolute_usec(void) +MOCK_IMPL(uint64_t, +monotime_absolute_usec,(void)) { return monotime_absolute_nsec() / 1000; } diff --git a/src/lib/time/compat_time.h b/src/lib/time/compat_time.h index 44fab62de5..bf1bd28801 100644 --- a/src/lib/time/compat_time.h +++ b/src/lib/time/compat_time.h @@ -103,7 +103,7 @@ uint64_t monotime_absolute_nsec(void); /** * Return the number of microseconds since the timer system was initialized. */ -uint64_t monotime_absolute_usec(void); +MOCK_DECL(uint64_t, monotime_absolute_usec,(void)); /** * Return the number of milliseconds since the timer system was initialized. */ diff --git a/src/test/Makefile.nmake b/src/test/Makefile.nmake index aa16a22b52..ca6a84cf8a 100644 --- a/src/test/Makefile.nmake +++ b/src/test/Makefile.nmake @@ -19,6 +19,7 @@ TEST_OBJECTS = test.obj test_addr.obj test_channel.obj test_channeltls.obj \ test_cell_formats.obj test_relay.obj test_replay.obj \ test_channelpadding.obj \ test_circuitstats.obj \ + test_circuitpadding.obj \ test_scheduler.obj test_introduce.obj test_hs.obj tinytest.obj tinytest.obj: ..\ext\tinytest.c diff --git a/src/test/include.am b/src/test/include.am index 4725e8cbaa..4da0b84392 100644 --- a/src/test/include.am +++ b/src/test/include.am @@ -101,6 +101,7 @@ src_test_test_SOURCES += \ src/test/test_cell_queue.c \ src/test/test_channel.c \ src/test/test_channelpadding.c \ + src/test/test_circuitpadding.c \ src/test/test_channeltls.c \ src/test/test_checkdir.c \ src/test/test_circuitlist.c \ diff --git a/src/test/test.c b/src/test/test.c index 13e8c71709..a0a138b03d 100644 --- a/src/test/test.c +++ b/src/test/test.c @@ -845,6 +845,7 @@ struct testgroup_t testgroups[] = { { "channeltls/", channeltls_tests }, { "checkdir/", checkdir_tests }, { "circuitbuild/", circuitbuild_tests }, + { "circuitpadding/", circuitpadding_tests }, { "circuitlist/", circuitlist_tests }, { "circuitmux/", circuitmux_tests }, { "circuitstats/", circuitstats_tests }, diff --git a/src/test/test.h b/src/test/test.h index 9f754469c8..9f6eb0a7e6 100644 --- a/src/test/test.h +++ b/src/test/test.h @@ -187,6 +187,7 @@ extern struct testcase_t cell_format_tests[]; extern struct testcase_t cell_queue_tests[]; extern struct testcase_t channel_tests[]; extern struct testcase_t channelpadding_tests[]; +extern struct testcase_t circuitpadding_tests[]; extern struct testcase_t channeltls_tests[]; extern struct testcase_t checkdir_tests[]; extern struct testcase_t circuitbuild_tests[]; diff --git a/src/test/test_circuitpadding.c b/src/test/test_circuitpadding.c new file mode 100644 index 0000000000..78f93f7b24 --- /dev/null +++ b/src/test/test_circuitpadding.c @@ -0,0 +1,2358 @@ +#define TOR_CHANNEL_INTERNAL_ +#define TOR_TIMERS_PRIVATE +#define CIRCUITPADDING_PRIVATE +#define NETWORKSTATUS_PRIVATE + +#include "core/or/or.h" +#include "test.h" +#include "lib/testsupport/testsupport.h" +#include "core/or/connection_or.h" +#include "core/or/channel.h" +#include "core/or/channeltls.h" +#include +#include "lib/evloop/compat_libevent.h" +#include "lib/time/compat_time.h" +#include "core/or/relay.h" +#include "core/or/circuitlist.h" +#include "core/or/circuitbuild.h" +#include "core/or/circuitpadding.h" +#include "core/crypto/relay_crypto.h" +#include "core/or/protover.h" +#include "feature/nodelist/nodelist.h" +#include "lib/evloop/compat_libevent.h" +#include "app/config/config.h" + +#include "feature/nodelist/routerstatus_st.h" +#include "feature/nodelist/networkstatus_st.h" +#include "feature/nodelist/node_st.h" +#include "core/or/cell_st.h" +#include "core/or/crypt_path_st.h" +#include "core/or/or_circuit_st.h" +#include "core/or/origin_circuit_st.h" + +extern smartlist_t *connection_array; + +#define USEC_PER_SEC (1000000) +#define NSEC_PER_USEC (1000) +#define NSEC_PER_MSEC (1000*1000) + +circid_t get_unique_circ_id_by_chan(channel_t *chan); +void helper_create_basic_machine(void); +static void helper_create_conditional_machines(void); + +static or_circuit_t * new_fake_orcirc(channel_t *nchan, channel_t *pchan); +channel_t *new_fake_channel(void); +void test_circuitpadding_negotiation(void *arg); +void test_circuitpadding_wronghop(void *arg); +void test_circuitpadding_conditions(void *arg); + +void test_circuitpadding_serialize(void *arg); +void test_circuitpadding_rtt(void *arg); +void test_circuitpadding_tokens(void *arg); +void test_circuitpadding_circuitsetup_machine(void *arg); + +static void +simulate_single_hop_extend(circuit_t *client, circuit_t *mid_relay, + int padding); +void free_fake_orcirc(circuit_t *circ); +void free_fake_origin_circuit(origin_circuit_t *circ); + +static int deliver_negotiated = 1; +static int64_t curr_mocked_time; + +static node_t padding_node; +static node_t non_padding_node; + +static channel_t dummy_channel; +static circpad_machine_t circ_client_machine; + +static void +timers_advance_and_run(int64_t msec_update) +{ + curr_mocked_time += msec_update*NSEC_PER_MSEC; + monotime_coarse_set_mock_time_nsec(curr_mocked_time); + monotime_set_mock_time_nsec(curr_mocked_time); + timers_run_pending(); +} + +static void +nodes_init(void) +{ + padding_node.rs = tor_malloc_zero(sizeof(routerstatus_t)); + padding_node.rs->pv.supports_padding = 1; + + non_padding_node.rs = tor_malloc_zero(sizeof(routerstatus_t)); + non_padding_node.rs->pv.supports_padding = 0; +} + +static void +nodes_free(void) +{ + tor_free(padding_node.rs); + + tor_free(non_padding_node.rs); +} + +static const node_t * +node_get_by_id_mock(const char *identity_digest) +{ + if (identity_digest[0] == 1) { + return &padding_node; + } else if (identity_digest[0] == 0) { + return &non_padding_node; + } + + return NULL; +} + +static or_circuit_t * +new_fake_orcirc(channel_t *nchan, channel_t *pchan) +{ + or_circuit_t *orcirc = NULL; + circuit_t *circ = NULL; + crypt_path_t tmp_cpath; + char whatevs_key[CPATH_KEY_MATERIAL_LEN]; + + orcirc = tor_malloc_zero(sizeof(*orcirc)); + circ = &(orcirc->base_); + circ->magic = OR_CIRCUIT_MAGIC; + + //circ->n_chan = nchan; + circ->n_circ_id = get_unique_circ_id_by_chan(nchan); + circ->n_mux = NULL; /* ?? */ + cell_queue_init(&(circ->n_chan_cells)); + circ->n_hop = NULL; + circ->streams_blocked_on_n_chan = 0; + circ->streams_blocked_on_p_chan = 0; + circ->n_delete_pending = 0; + circ->p_delete_pending = 0; + circ->received_destroy = 0; + circ->state = CIRCUIT_STATE_OPEN; + circ->purpose = CIRCUIT_PURPOSE_OR; + circ->package_window = CIRCWINDOW_START_MAX; + circ->deliver_window = CIRCWINDOW_START_MAX; + circ->n_chan_create_cell = NULL; + + //orcirc->p_chan = pchan; + orcirc->p_circ_id = get_unique_circ_id_by_chan(pchan); + cell_queue_init(&(orcirc->p_chan_cells)); + + circuit_set_p_circid_chan(orcirc, orcirc->p_circ_id, pchan); + circuit_set_n_circid_chan(circ, circ->n_circ_id, nchan); + + memset(&tmp_cpath, 0, sizeof(tmp_cpath)); + if (circuit_init_cpath_crypto(&tmp_cpath, whatevs_key, + sizeof(whatevs_key), 0, 0)<0) { + log_warn(LD_BUG,"Circuit initialization failed"); + return NULL; + } + orcirc->crypto = tmp_cpath.crypto; + + return orcirc; +} + +void +free_fake_orcirc(circuit_t *circ) +{ + or_circuit_t *orcirc = TO_OR_CIRCUIT(circ); + + relay_crypto_clear(&orcirc->crypto); + + circpad_circuit_free_all_machineinfos(circ); + tor_free(circ); +} + +void +free_fake_origin_circuit(origin_circuit_t *circ) +{ + circpad_circuit_free_all_machineinfos(TO_CIRCUIT(circ)); + circuit_clear_cpath(circ); + tor_free(circ); +} + +void dummy_nop_timer(void); + +//static int dont_stop_libevent = 0; + +static circuit_t *client_side; +static circuit_t *relay_side; + +static int n_client_cells = 0; +static int n_relay_cells = 0; + +static int +circuit_package_relay_cell_mock(cell_t *cell, circuit_t *circ, + cell_direction_t cell_direction, + crypt_path_t *layer_hint, streamid_t on_stream, + const char *filename, int lineno); + +static void +circuitmux_attach_circuit_mock(circuitmux_t *cmux, circuit_t *circ, + cell_direction_t direction); + +static void +circuitmux_attach_circuit_mock(circuitmux_t *cmux, circuit_t *circ, + cell_direction_t direction) +{ + (void)cmux; + (void)circ; + (void)direction; + + return; +} + +static int +circuit_package_relay_cell_mock(cell_t *cell, circuit_t *circ, + cell_direction_t cell_direction, + crypt_path_t *layer_hint, streamid_t on_stream, + const char *filename, int lineno) +{ + (void)cell; (void)on_stream; (void)filename; (void)lineno; + + if (circ == client_side) { + if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATE) { + // Deliver to relay + circpad_handle_padding_negotiate(relay_side, cell); + } else { + + int is_target_hop = circpad_padding_is_from_expected_hop(circ, + layer_hint); + tt_int_op(cell_direction, OP_EQ, CELL_DIRECTION_OUT); + tt_int_op(is_target_hop, OP_EQ, 1); + + // No need to pretend a padding cell was sent: This event is + // now emitted internally when the circuitpadding code sends them. + //circpad_cell_event_padding_sent(client_side); + + // Receive padding cell at middle + circpad_deliver_recognized_relay_cell_events(relay_side, + cell->payload[0], NULL); + } + n_client_cells++; + } else if (circ == relay_side) { + tt_int_op(cell_direction, OP_EQ, CELL_DIRECTION_IN); + + if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATED) { + // XXX: blah need right layer_hint.. + if (deliver_negotiated) + circpad_handle_padding_negotiated(client_side, cell, + TO_ORIGIN_CIRCUIT(client_side) + ->cpath->next); + } else if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATE) { + circpad_handle_padding_negotiate(client_side, cell); + } else { + // No need to pretend a padding cell was sent: This event is + // now emitted internally when the circuitpadding code sends them. + //circpad_cell_event_padding_sent(relay_side); + + // Receive padding cell at client + circpad_deliver_recognized_relay_cell_events(client_side, + cell->payload[0], + TO_ORIGIN_CIRCUIT(client_side)->cpath->next); + } + + n_relay_cells++; + } + + done: + timers_advance_and_run(1); + return 0; +} + +// Test reading and writing padding to strings (or options_t + consensus) +void +test_circuitpadding_serialize(void *arg) +{ + (void)arg; +} + +static signed_error_t +circpad_send_command_to_hop_mock(origin_circuit_t *circ, uint8_t hopnum, + uint8_t relay_command, const uint8_t *payload, + ssize_t payload_len) +{ + (void) circ; + (void) hopnum; + (void) relay_command; + (void) payload; + (void) payload_len; + return 0; +} + +void +test_circuitpadding_rtt(void *arg) +{ + /* Test Plan: + * + * 1. Test RTT measurement server side + * a. test usage of measured RTT + * 2. Test termination of RTT measurement + * a. test non-update of RTT + * 3. Test client side circuit and non-application of RTT.. + */ + circpad_delay_t rtt_estimate; + (void)arg; + + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + MOCK(circpad_send_command_to_hop, circpad_send_command_to_hop_mock); + + dummy_channel.cmux = circuitmux_alloc(); + relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel)); + client_side = TO_CIRCUIT(origin_circuit_new()); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*NSEC_PER_USEC); + curr_mocked_time = 1*NSEC_PER_USEC; + + timers_initialize(); + circpad_machines_init(); + helper_create_basic_machine(); + + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side, + 0); + + relay_side->padding_machine[0] = &circ_client_machine; + relay_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side,0); + + /* Test 1: Test measuring RTT */ + circpad_cell_event_nonpadding_received((circuit_t*)relay_side); + tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_NE, 0); + + timers_advance_and_run(20); + + circpad_cell_event_nonpadding_sent((circuit_t*)relay_side); + tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0); + + tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_GE, 19000); + tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_LE, 30000); + tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0), + OP_EQ, + relay_side->padding_info[0]->rtt_estimate_usec+ + circpad_machine_current_state( + relay_side->padding_info[0])->start_usec); + + circpad_cell_event_nonpadding_received((circuit_t*)relay_side); + circpad_cell_event_nonpadding_received((circuit_t*)relay_side); + tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_NE, 0); + timers_advance_and_run(20); + circpad_cell_event_nonpadding_sent((circuit_t*)relay_side); + circpad_cell_event_nonpadding_sent((circuit_t*)relay_side); + tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0); + + tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_GE, 20000); + tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_LE, 21000); + tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0), + OP_EQ, + relay_side->padding_info[0]->rtt_estimate_usec+ + circpad_machine_current_state( + relay_side->padding_info[0])->start_usec); + + /* Test 2: Termination of RTT measurement (from the previous test) */ + tt_int_op(relay_side->padding_info[0]->stop_rtt_update, OP_EQ, 1); + rtt_estimate = relay_side->padding_info[0]->rtt_estimate_usec; + + circpad_cell_event_nonpadding_received((circuit_t*)relay_side); + timers_advance_and_run(4); + circpad_cell_event_nonpadding_sent((circuit_t*)relay_side); + + tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_EQ, + rtt_estimate); + tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->stop_rtt_update, OP_EQ, 1); + tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0), + OP_EQ, + relay_side->padding_info[0]->rtt_estimate_usec+ + circpad_machine_current_state( + relay_side->padding_info[0])->start_usec); + + /* Test 3: Make sure client side machine properly ignores RTT */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->last_received_time_usec, OP_EQ, 0); + + timers_advance_and_run(20); + circpad_cell_event_nonpadding_sent((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->last_received_time_usec, OP_EQ, 0); + + tt_int_op(client_side->padding_info[0]->rtt_estimate_usec, OP_EQ, 0); + tt_int_op(circpad_histogram_bin_to_usec(client_side->padding_info[0], 0), + OP_NE, client_side->padding_info[0]->rtt_estimate_usec); + tt_int_op(circpad_histogram_bin_to_usec(client_side->padding_info[0], 0), + OP_EQ, + circpad_machine_current_state( + client_side->padding_info[0])->start_usec); + done: + free_fake_orcirc(relay_side); + circuitmux_detach_all_circuits(dummy_channel.cmux, NULL); + circuitmux_free(dummy_channel.cmux); + timers_shutdown(); + monotime_disable_test_mocking(); + UNMOCK(circuit_package_relay_cell); + UNMOCK(circuitmux_attach_circuit); + tor_free(circ_client_machine.states); + + return; +} + +void +helper_create_basic_machine(void) +{ + /* Start, burst */ + circpad_machine_states_init(&circ_client_machine, 2); + + circ_client_machine.states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + + circ_client_machine.states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST; + circ_client_machine.states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + + circ_client_machine.states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_CANCEL; + + // FIXME: Is this what we want? + circ_client_machine.states[CIRCPAD_STATE_BURST].token_removal = + CIRCPAD_TOKEN_REMOVAL_HIGHER; + + // FIXME: Tune this histogram + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_len = 5; + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 500; + circ_client_machine.states[CIRCPAD_STATE_BURST].range_usec = 1000000; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[0] = 1; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[1] = 0; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[2] = 2; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[3] = 2; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[4] = 2; + circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_total_tokens = 7; + circ_client_machine.states[CIRCPAD_STATE_BURST].use_rtt_estimate = 1; + + return; +} + +#define BIG_HISTOGRAM_LEN 10 + +/** Setup a machine with a big histogram */ +static void +helper_create_machine_with_big_histogram(circpad_removal_t removal_strategy) +{ + const int tokens_per_bin = 2; + + /* Start, burst */ + circpad_machine_states_init(&circ_client_machine, 2); + + circpad_state_t *burst_state = + &circ_client_machine.states[CIRCPAD_STATE_BURST]; + + circ_client_machine.states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST; + + burst_state->next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST; + burst_state->next_state[CIRCPAD_EVENT_NONPADDING_RECV] =CIRCPAD_STATE_BURST; + + burst_state->next_state[CIRCPAD_EVENT_NONPADDING_SENT] =CIRCPAD_STATE_CANCEL; + + burst_state->token_removal = CIRCPAD_TOKEN_REMOVAL_HIGHER; + + burst_state->histogram_len = BIG_HISTOGRAM_LEN; + burst_state->start_usec = 0; + burst_state->range_usec = 1000; + + int n_tokens = 0; + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + burst_state->histogram[i] = tokens_per_bin; + n_tokens += tokens_per_bin; + } + + burst_state->histogram_total_tokens = n_tokens; + burst_state->length_dist.type = CIRCPAD_DIST_UNIFORM; + burst_state->length_dist.param1 = n_tokens; + burst_state->length_dist.param2 = n_tokens; + burst_state->max_length = n_tokens; + burst_state->length_includes_nonpadding = 1; + burst_state->use_rtt_estimate = 0; + burst_state->token_removal = removal_strategy; +} + +static circpad_decision_t +circpad_machine_schedule_padding_mock(circpad_machineinfo_t *mi) +{ + (void)mi; + return 0; +} + +static uint64_t +mock_monotime_absolute_usec(void) +{ + return 100; +} + +/** Test higher token removal strategy by bin */ +static void +test_circuitpadding_token_removal_higher(void *arg) +{ + circpad_machineinfo_t *mi; + (void)arg; + + /* Mock it up */ + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock); + + /* Setup test environment (time etc.) */ + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + monotime_enable_test_mocking(); + + /* Create test machine */ + helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_HIGHER); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + + /* move the machine to the right state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + /* Get the machine and setup tokens */ + mi = client_side->padding_info[0]; + tt_assert(mi); + + /*************************************************************************/ + + uint64_t current_time = monotime_absolute_usec(); + + /* Test left boundaries of each histogram bin: */ + const circpad_delay_t bin_left_bounds[] = + {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE}; + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_uint_op(bin_left_bounds[i], OP_EQ, + circpad_histogram_bin_to_usec(mi, i)); + } + + /* Check that all bins have two tokens right now */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* This is the right order to remove tokens from this histogram. That is, we + * first remove tokens from the 4th bin since 57 usec is nearest to the 4th + * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for + * the same reason, then from the 5th, etc. */ + const int bin_removal_order[] = {4, 5, 6, 7, 8}; + unsigned i; + + /* Remove all tokens from all bins apart from the infinity bin */ + for (i = 0; i < sizeof(bin_removal_order)/sizeof(int) ; i++) { + int bin_to_remove = bin_removal_order[i]; + log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin", + i, bin_to_remove); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + /* Test that we cleaned out this bin. Don't do this in the case of the last + bin since the tokens will get refilled */ + if (i != BIG_HISTOGRAM_LEN - 2) { + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0); + } + } + + /* Check that all lowe bins are not touched */ + for (i=0; i < 4 ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* Test below the lowest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 1; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[0], OP_EQ, 1); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +/** Test lower token removal strategy by bin */ +static void +test_circuitpadding_token_removal_lower(void *arg) +{ + circpad_machineinfo_t *mi; + (void)arg; + + /* Mock it up */ + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock); + + /* Setup test environment (time etc.) */ + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + monotime_enable_test_mocking(); + + /* Create test machine */ + helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_LOWER); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + + /* move the machine to the right state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + /* Get the machine and setup tokens */ + mi = client_side->padding_info[0]; + tt_assert(mi); + + /*************************************************************************/ + + uint64_t current_time = monotime_absolute_usec(); + + /* Test left boundaries of each histogram bin: */ + const circpad_delay_t bin_left_bounds[] = + {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE}; + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_uint_op(bin_left_bounds[i], OP_EQ, + circpad_histogram_bin_to_usec(mi, i)); + } + + /* Check that all bins have two tokens right now */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* This is the right order to remove tokens from this histogram. That is, we + * first remove tokens from the 4th bin since 57 usec is nearest to the 4th + * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for + * the same reason, then from the 5th, etc. */ + const int bin_removal_order[] = {4, 3, 2, 1, 0}; + unsigned i; + + /* Remove all tokens from all bins apart from the infinity bin */ + for (i = 0; i < sizeof(bin_removal_order)/sizeof(int) ; i++) { + int bin_to_remove = bin_removal_order[i]; + log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin", + i, bin_to_remove); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + /* Test that we cleaned out this bin. Don't do this in the case of the last + bin since the tokens will get refilled */ + if (i != BIG_HISTOGRAM_LEN - 2) { + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0); + } + } + + /* Check that all higher bins are untouched */ + for (i = 5; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* Test above the highest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 29202; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +/** Test closest token removal strategy by bin */ +static void +test_circuitpadding_closest_token_removal(void *arg) +{ + circpad_machineinfo_t *mi; + (void)arg; + + /* Mock it up */ + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock); + + /* Setup test environment (time etc.) */ + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + monotime_enable_test_mocking(); + + /* Create test machine */ + helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_CLOSEST); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + + /* move the machine to the right state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + /* Get the machine and setup tokens */ + mi = client_side->padding_info[0]; + tt_assert(mi); + + /*************************************************************************/ + + uint64_t current_time = monotime_absolute_usec(); + + /* Test left boundaries of each histogram bin: */ + const circpad_delay_t bin_left_bounds[] = + {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE}; + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_uint_op(bin_left_bounds[i], OP_EQ, + circpad_histogram_bin_to_usec(mi, i)); + } + + /* Check that all bins have two tokens right now */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* This is the right order to remove tokens from this histogram. That is, we + * first remove tokens from the 4th bin since 57 usec is nearest to the 4th + * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for + * the same reason, then from the 5th, etc. */ + const int bin_removal_order[] = {4, 3, 5, 2, 6, 1, 7, 0, 8, 9}; + + /* Remove all tokens from all bins apart from the infinity bin */ + for (int i = 0; i < BIG_HISTOGRAM_LEN-1 ; i++) { + int bin_to_remove = bin_removal_order[i]; + log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin", + i, bin_to_remove); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + /* Test that we cleaned out this bin. Don't do this in the case of the last + bin since the tokens will get refilled */ + if (i != BIG_HISTOGRAM_LEN - 2) { + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0); + } + } + + /* Check that all bins have been refilled */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* Test below the lowest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 102; + mi->histogram[0] = 0; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[1], OP_EQ, 1); + + /* Test above the highest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 29202; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +/** Test closest token removal strategy with usec */ +static void +test_circuitpadding_closest_token_removal_usec(void *arg) +{ + circpad_machineinfo_t *mi; + (void)arg; + + /* Mock it up */ + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock); + + /* Setup test environment (time etc.) */ + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + monotime_enable_test_mocking(); + + /* Create test machine */ + helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + + /* move the machine to the right state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + /* Get the machine and setup tokens */ + mi = client_side->padding_info[0]; + tt_assert(mi); + + /*************************************************************************/ + + uint64_t current_time = monotime_absolute_usec(); + + /* Test left boundaries of each histogram bin: */ + const circpad_delay_t bin_left_bounds[] = + {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE}; + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_uint_op(bin_left_bounds[i], OP_EQ, + circpad_histogram_bin_to_usec(mi, i)); + } + + /* XXX we want to test remove_token_exact and + circpad_machine_remove_closest_token() with usec */ + + /* Check that all bins have two tokens right now */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* This is the right order to remove tokens from this histogram. That is, we + * first remove tokens from the 4th bin since 57 usec is nearest to the 4th + * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for + * the same reason, then from the 5th, etc. */ + const int bin_removal_order[] = {4, 3, 5, 2, 1, 0, 6, 7, 8, 9}; + + /* Remove all tokens from all bins apart from the infinity bin */ + for (int i = 0; i < BIG_HISTOGRAM_LEN-1 ; i++) { + int bin_to_remove = bin_removal_order[i]; + log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin", + i, bin_to_remove); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1); + + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + + /* Test that we cleaned out this bin. Don't do this in the case of the last + bin since the tokens will get refilled */ + if (i != BIG_HISTOGRAM_LEN - 2) { + tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0); + } + } + + /* Check that all bins have been refilled */ + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + + /* Test below the lowest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 102; + mi->histogram[0] = 0; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[1], OP_EQ, 1); + + /* Test above the highest bin, for coverage */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100; + mi->padding_scheduled_at_usec = current_time - 29202; + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +/** Test closest token removal strategy with usec */ +static void +test_circuitpadding_token_removal_exact(void *arg) +{ + circpad_machineinfo_t *mi; + (void)arg; + + /* Mock it up */ + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock); + + /* Setup test environment (time etc.) */ + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + monotime_enable_test_mocking(); + + /* Create test machine */ + helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_EXACT); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + + /* move the machine to the right state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + /* Get the machine and setup tokens */ + mi = client_side->padding_info[0]; + tt_assert(mi); + + /**********************************************************************/ + uint64_t current_time = monotime_absolute_usec(); + + /* Ensure that we will clear out bin #4 with this usec */ + mi->padding_scheduled_at_usec = current_time - 57; + tt_int_op(mi->histogram[4], OP_EQ, 2); + circpad_machine_remove_token(mi); + mi->padding_scheduled_at_usec = current_time - 57; + tt_int_op(mi->histogram[4], OP_EQ, 1); + circpad_machine_remove_token(mi); + tt_int_op(mi->histogram[4], OP_EQ, 0); + + /* Ensure that we will not remove any other tokens even tho we try to, since + * this is what the exact strategy dictates */ + mi->padding_scheduled_at_usec = current_time - 57; + circpad_machine_remove_token(mi); + for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) { + if (i != 4) { + tt_int_op(mi->histogram[i], OP_EQ, 2); + } + } + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +#undef BIG_HISTOGRAM_LEN + +void +test_circuitpadding_tokens(void *arg) +{ + const circpad_state_t *state; + circpad_machineinfo_t *mi; + (void)arg; + + /** Test plan: + * + * 1. Test symmetry between bin_to_usec and usec_to_bin + * a. Test conversion + * b. Test edge transitions (lower, upper) + * 2. Test remove higher on an empty bin + * a. Normal bin + * b. Infinity bin + * c. Bin 0 + * d. No higher + * 3. Test remove lower + * a. Normal bin + * b. Bin 0 + * c. No lower + * 4. Test remove closest + * a. Closest lower + * b. Closest higher + * c. Closest 0 + * d. Closest Infinity + */ + client_side = TO_CIRCUIT(origin_circuit_new()); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*NSEC_PER_USEC); + curr_mocked_time = 1*NSEC_PER_USEC; + + timers_initialize(); + + helper_create_basic_machine(); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side, + 0); + + mi = client_side->padding_info[0]; + + // Pretend a non-padding cell was sent + // XXX: This messes us up.. Padding gets scheduled.. + circpad_cell_event_nonpadding_sent((circuit_t*)client_side); + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + /* We have to save the infinity bin because one inf delay + * could have been chosen when we transition to burst */ + circpad_hist_token_t inf_bin = mi->histogram[4]; + + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + state = circpad_machine_current_state(client_side->padding_info[0]); + + // Test 0: convert bin->usec->bin + // Bin 0+1 have different semantics + for (int bin = 0; bin < 2; bin++) { + circpad_delay_t usec = + circpad_histogram_bin_to_usec(client_side->padding_info[0], bin); + int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec); + tt_int_op(bin, OP_EQ, bin2); + } + for (int bin = 2; bin < state->histogram_len-1; bin++) { + circpad_delay_t usec = + circpad_histogram_bin_to_usec(client_side->padding_info[0], bin); + int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec); + tt_int_op(bin, OP_EQ, bin2); + /* Verify we round down */ + bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec+3); + tt_int_op(bin, OP_EQ, bin2); + + bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec-1); + tt_int_op(bin, OP_EQ, bin2+1); + } + + // Test 1: converting usec->bin->usec->bin + // Bin 0+1 have different semantics. + for (circpad_delay_t i = 0; i <= state->start_usec+1; i++) { + int bin = circpad_histogram_usec_to_bin(client_side->padding_info[0], + i); + circpad_delay_t usec = + circpad_histogram_bin_to_usec(client_side->padding_info[0], bin); + int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec); + tt_int_op(bin, OP_EQ, bin2); + tt_int_op(i, OP_LE, usec); + } + for (circpad_delay_t i = state->start_usec+1; + i <= state->start_usec + state->range_usec; i++) { + int bin = circpad_histogram_usec_to_bin(client_side->padding_info[0], + i); + circpad_delay_t usec = + circpad_histogram_bin_to_usec(client_side->padding_info[0], bin); + int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0], + usec); + tt_int_op(bin, OP_EQ, bin2); + tt_int_op(i, OP_GE, usec); + } + + /* 2.a. Normal higher bin */ + { + tt_int_op(mi->histogram[2], OP_EQ, 2); + tt_int_op(mi->histogram[3], OP_EQ, 2); + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + tt_int_op(mi->histogram[3], OP_EQ, 2); + tt_int_op(mi->histogram[2], OP_EQ, 1); + + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + tt_int_op(mi->histogram[2], OP_EQ, 0); + + tt_int_op(mi->histogram[3], OP_EQ, 2); + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + tt_int_op(mi->histogram[3], OP_EQ, 0); + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + tt_int_op(mi->histogram[3], OP_EQ, 0); + } + + /* 2.b. Higher Infinity bin */ + { + tt_int_op(mi->histogram[4], OP_EQ, inf_bin); + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1); + tt_int_op(mi->histogram[4], OP_EQ, inf_bin); + + /* Test past the infinity bin */ + circpad_machine_remove_higher_token(mi, + circpad_histogram_bin_to_usec(mi, 5)+1000000); + + tt_int_op(mi->histogram[4], OP_EQ, inf_bin); + } + + /* 2.c. Bin 0 */ + { + tt_int_op(mi->histogram[0], OP_EQ, 1); + circpad_machine_remove_higher_token(mi, + state->start_usec/2); + tt_int_op(mi->histogram[0], OP_EQ, 0); + } + + /* Drain the infinity bin and cause a refill */ + while (inf_bin != 0) { + tt_int_op(mi->histogram[4], OP_EQ, inf_bin); + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + inf_bin--; + } + + circpad_cell_event_nonpadding_sent((circuit_t*)client_side); + + // We should have refilled here. + tt_int_op(mi->histogram[4], OP_EQ, 2); + + /* 3.a. Bin 0 */ + { + tt_int_op(mi->histogram[0], OP_EQ, 1); + circpad_machine_remove_higher_token(mi, + state->start_usec/2); + tt_int_op(mi->histogram[0], OP_EQ, 0); + } + + /* 3.b. Test remove lower normal bin */ + { + tt_int_op(mi->histogram[3], OP_EQ, 2); + circpad_machine_remove_lower_token(mi, + circpad_histogram_bin_to_usec(mi, 3)+1); + circpad_machine_remove_lower_token(mi, + circpad_histogram_bin_to_usec(mi, 3)+1); + tt_int_op(mi->histogram[3], OP_EQ, 0); + tt_int_op(mi->histogram[2], OP_EQ, 2); + circpad_machine_remove_lower_token(mi, + circpad_histogram_bin_to_usec(mi, 3)+1); + circpad_machine_remove_lower_token(mi, + circpad_histogram_bin_to_usec(mi, 3)+1); + /* 3.c. No lower */ + circpad_machine_remove_lower_token(mi, + circpad_histogram_bin_to_usec(mi, 3)+1); + tt_int_op(mi->histogram[2], OP_EQ, 0); + } + + /* 4. Test remove closest + * a. Closest lower + * b. Closest higher + * c. Closest 0 + * d. Closest Infinity + */ + circpad_machine_setup_tokens(mi); + tt_int_op(mi->histogram[2], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + tt_int_op(mi->histogram[2], OP_EQ, 0); + tt_int_op(mi->histogram[3], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + tt_int_op(mi->histogram[3], OP_EQ, 0); + tt_int_op(mi->histogram[0], OP_EQ, 1); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + tt_int_op(mi->histogram[0], OP_EQ, 0); + tt_int_op(mi->histogram[4], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 2)+1, 0); + tt_int_op(mi->histogram[4], OP_EQ, 2); + + /* 5. Test remove closest usec + * a. Closest 0 + * b. Closest lower (below midpoint) + * c. Closest higher (above midpoint) + * d. Closest Infinity + */ + circpad_machine_setup_tokens(mi); + + tt_int_op(mi->histogram[0], OP_EQ, 1); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 0)/3, 1); + tt_int_op(mi->histogram[0], OP_EQ, 0); + tt_int_op(mi->histogram[2], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 0)/3, 1); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 0)/3, 1); + tt_int_op(mi->histogram[2], OP_EQ, 0); + tt_int_op(mi->histogram[3], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 4), 1); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 4), 1); + tt_int_op(mi->histogram[3], OP_EQ, 0); + tt_int_op(mi->histogram[4], OP_EQ, 2); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 4), 1); + circpad_machine_remove_closest_token(mi, + circpad_histogram_bin_to_usec(mi, 4), 1); + tt_int_op(mi->histogram[4], OP_EQ, 2); + + // XXX: Need more coverage of the actual usec branches + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + monotime_disable_test_mocking(); + tor_free(circ_client_machine.states); +} + +void +test_circuitpadding_wronghop(void *arg) +{ + /** + * Test plan: + * 1. Padding sent from hop 1 and 3 to client + * 2. Send negotiated from hop 1 and 3 to client + * 3. Garbled negotiated cell + * 4. Padding negotiate sent to client + * 5. Send negotiate stop command for unknown machine + * 6. Send negotiated to relay + * 7. Garbled padding negotiate cell + */ + (void)arg; + uint32_t read_bw = 0, overhead_bw = 0; + cell_t cell; + signed_error_t ret; + origin_circuit_t *orig_client; + + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + + client_side = (circuit_t *)origin_circuit_new(); + dummy_channel.cmux = circuitmux_alloc(); + relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel, + &dummy_channel); + orig_client = TO_ORIGIN_CIRCUIT(client_side); + + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + nodes_init(); + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; + + timers_initialize(); + circpad_machines_init(); + + MOCK(node_get_by_id, + node_get_by_id_mock); + + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + + /* Build three hops */ + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + /* verify padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL); + tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL); + + /* verify echo was sent */ + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(n_client_cells, OP_EQ, 1); + + read_bw = orig_client->n_delivered_read_circ_bw; + overhead_bw = orig_client->n_overhead_read_circ_bw; + + /* 1. Test padding from first and third hop */ + circpad_deliver_recognized_relay_cell_events(client_side, + RELAY_COMMAND_DROP, + TO_ORIGIN_CIRCUIT(client_side)->cpath); + tt_int_op(read_bw, OP_EQ, + orig_client->n_delivered_read_circ_bw); + tt_int_op(overhead_bw, OP_EQ, + orig_client->n_overhead_read_circ_bw); + + circpad_deliver_recognized_relay_cell_events(client_side, + RELAY_COMMAND_DROP, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next->next); + tt_int_op(read_bw, OP_EQ, + orig_client->n_delivered_read_circ_bw); + tt_int_op(overhead_bw, OP_EQ, + orig_client->n_overhead_read_circ_bw); + + circpad_deliver_recognized_relay_cell_events(client_side, + RELAY_COMMAND_DROP, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next); + tt_int_op(read_bw, OP_EQ, + orig_client->n_delivered_read_circ_bw); + tt_int_op(overhead_bw, OP_LT, + orig_client->n_overhead_read_circ_bw); + + /* 2. Test padding negotiated not handled from hops 1,3 */ + ret = circpad_handle_padding_negotiated(client_side, &cell, + TO_ORIGIN_CIRCUIT(client_side)->cpath); + tt_int_op(ret, OP_EQ, -1); + + ret = circpad_handle_padding_negotiated(client_side, &cell, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next->next); + tt_int_op(ret, OP_EQ, -1); + + /* 3. Garbled negotiated cell */ + memset(&cell, 255, sizeof(cell)); + ret = circpad_handle_padding_negotiated(client_side, &cell, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next); + tt_int_op(ret, OP_EQ, -1); + + /* 4. Test that negotiate is dropped at origin */ + read_bw = orig_client->n_delivered_read_circ_bw; + overhead_bw = orig_client->n_overhead_read_circ_bw; + relay_send_command_from_edge(0, relay_side, + RELAY_COMMAND_PADDING_NEGOTIATE, + (void*)cell.payload, + (size_t)3, NULL); + tt_int_op(read_bw, OP_EQ, + orig_client->n_delivered_read_circ_bw); + tt_int_op(overhead_bw, OP_EQ, + orig_client->n_overhead_read_circ_bw); + + tt_int_op(n_relay_cells, OP_EQ, 2); + tt_int_op(n_client_cells, OP_EQ, 1); + + /* 5. Test that asking to stop the wrong machine does nothing */ + circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(client_side), + 255, 2, CIRCPAD_COMMAND_STOP); + tt_ptr_op(client_side->padding_machine[0], OP_NE, NULL); + tt_ptr_op(client_side->padding_info[0], OP_NE, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL); + tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL); + tt_int_op(n_relay_cells, OP_EQ, 3); + tt_int_op(n_client_cells, OP_EQ, 2); + + /* 6. Sending negotiated command to relay does nothing */ + ret = circpad_handle_padding_negotiated(relay_side, &cell, NULL); + tt_int_op(ret, OP_EQ, -1); + + /* 7. Test garbled negotated cell (bad command 255) */ + memset(&cell, 0, sizeof(cell)); + ret = circpad_handle_padding_negotiate(relay_side, &cell); + tt_int_op(ret, OP_EQ, -1); + tt_int_op(n_client_cells, OP_EQ, 2); + + /* Test 2: Test no padding */ + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + free_fake_orcirc(relay_side); + + client_side = (circuit_t *)origin_circuit_new(); + relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel, + &dummy_channel); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 0); + + /* verify no padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + tt_int_op(n_relay_cells, OP_EQ, 3); + tt_int_op(n_client_cells, OP_EQ, 2); + + /* verify no echo was sent */ + tt_int_op(n_relay_cells, OP_EQ, 3); + tt_int_op(n_client_cells, OP_EQ, 2); + + /* Finish circuit */ + simulate_single_hop_extend(client_side, relay_side, 1); + + /* Spoof padding negotiated on circuit with no padding */ + circpad_padding_negotiated(relay_side, + CIRCPAD_MACHINE_CIRC_SETUP, + CIRCPAD_COMMAND_START, + CIRCPAD_RESPONSE_OK); + + /* verify no padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + circpad_padding_negotiated(relay_side, + CIRCPAD_MACHINE_CIRC_SETUP, + CIRCPAD_COMMAND_START, + CIRCPAD_RESPONSE_ERR); + + /* verify no padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + free_fake_orcirc(relay_side); + circuitmux_detach_all_circuits(dummy_channel.cmux, NULL); + circuitmux_free(dummy_channel.cmux); + monotime_disable_test_mocking(); + UNMOCK(node_get_by_id); + UNMOCK(circuit_package_relay_cell); + UNMOCK(circuitmux_attach_circuit); + nodes_free(); +} + +void +test_circuitpadding_negotiation(void *arg) +{ + /** + * Test plan: + * 1. Test circuit where padding is supported by middle + * a. Make sure padding negotiation is sent + * b. Test padding negotiation delivery and parsing + * 2. Test circuit where padding is unsupported by middle + * a. Make sure padding negotiation is not sent + * 3. Test failure to negotiate a machine due to desync. + */ + (void)arg; + + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + + client_side = TO_CIRCUIT(origin_circuit_new()); + dummy_channel.cmux = circuitmux_alloc(); + relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel)); + + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + nodes_init(); + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*NSEC_PER_USEC); + curr_mocked_time = 1*NSEC_PER_USEC; + + timers_initialize(); + circpad_machines_init(); + + MOCK(node_get_by_id, + node_get_by_id_mock); + + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + + /* Build two hops */ + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + /* verify padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL); + tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL); + + /* verify echo was sent */ + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(n_client_cells, OP_EQ, 1); + + /* Finish circuit */ + simulate_single_hop_extend(client_side, relay_side, 1); + + /* Test 2: Test no padding */ + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + free_fake_orcirc(relay_side); + + client_side = TO_CIRCUIT(origin_circuit_new()); + relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel)); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 0); + + /* verify no padding was negotiated */ + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(n_client_cells, OP_EQ, 1); + + /* verify no echo was sent */ + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(n_client_cells, OP_EQ, 1); + + /* Finish circuit */ + simulate_single_hop_extend(client_side, relay_side, 1); + + /* Force negotiate padding. */ + circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(client_side), + CIRCPAD_MACHINE_CIRC_SETUP, + 2, CIRCPAD_COMMAND_START); + + /* verify no padding was negotiated */ + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + /* verify no echo was sent */ + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(n_client_cells, OP_EQ, 1); + + /* 3. Test failure to negotiate a machine due to desync */ + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + free_fake_orcirc(relay_side); + + client_side = TO_CIRCUIT(origin_circuit_new()); + relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel)); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + SMARTLIST_FOREACH(relay_padding_machines, + circpad_machine_t *, + m, tor_free(m->states); tor_free(m)); + smartlist_free(relay_padding_machines); + relay_padding_machines = smartlist_new(); + + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + /* verify echo was sent */ + tt_int_op(n_client_cells, OP_EQ, 2); + tt_int_op(n_relay_cells, OP_EQ, 2); + + /* verify no padding was negotiated */ + tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + free_fake_orcirc(relay_side); + circuitmux_detach_all_circuits(dummy_channel.cmux, NULL); + circuitmux_free(dummy_channel.cmux); + monotime_disable_test_mocking(); + UNMOCK(node_get_by_id); + UNMOCK(circuit_package_relay_cell); + UNMOCK(circuitmux_attach_circuit); + nodes_free(); +} + +static void +simulate_single_hop_extend(circuit_t *client, circuit_t *mid_relay, + int padding) +{ + char whatevs_key[CPATH_KEY_MATERIAL_LEN]; + char digest[DIGEST_LEN]; + tor_addr_t addr; + + // Pretend a non-padding cell was sent + circpad_cell_event_nonpadding_sent((circuit_t*)client); + + // Receive extend cell at middle + circpad_cell_event_nonpadding_received((circuit_t*)mid_relay); + + // Advance time a tiny bit so we can calculate an RTT + curr_mocked_time += 10 * NSEC_PER_MSEC; + monotime_coarse_set_mock_time_nsec(curr_mocked_time); + monotime_set_mock_time_nsec(curr_mocked_time); + + // Receive extended cell at middle + circpad_cell_event_nonpadding_sent((circuit_t*)mid_relay); + + // Receive extended cell at first hop + circpad_cell_event_nonpadding_received((circuit_t*)client); + + // Add a hop to cpath + crypt_path_t *hop = tor_malloc_zero(sizeof(crypt_path_t)); + onion_append_to_cpath(&TO_ORIGIN_CIRCUIT(client)->cpath, hop); + + hop->magic = CRYPT_PATH_MAGIC; + hop->state = CPATH_STATE_OPEN; + + // add an extend info to indicate if this node supports padding or not. + // (set the first byte of the digest for our mocked node_get_by_id) + digest[0] = padding; + + hop->extend_info = extend_info_new( + padding ? "padding" : "non-padding", + digest, NULL, NULL, NULL, + &addr, padding); + + circuit_init_cpath_crypto(hop, whatevs_key, sizeof(whatevs_key), 0, 0); + + hop->package_window = circuit_initial_package_window(); + hop->deliver_window = CIRCWINDOW_START; + + // Signal that the hop was added + circpad_machine_event_circ_added_hop(TO_ORIGIN_CIRCUIT(client)); +} + +static circpad_machine_t * +helper_create_conditional_machine(void) +{ + circpad_machine_t *ret = tor_malloc_zero(sizeof(circpad_machine_t)); + + /* Start, burst */ + circpad_machine_states_init(ret, 2); + + ret->states[CIRCPAD_STATE_START]. + next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_BURST; + + ret->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_BURST; + + ret->states[CIRCPAD_STATE_BURST]. + next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END; + + ret->states[CIRCPAD_STATE_BURST].token_removal = + CIRCPAD_TOKEN_REMOVAL_NONE; + + ret->states[CIRCPAD_STATE_BURST].histogram_len = 3; + ret->states[CIRCPAD_STATE_BURST].start_usec = 0; + ret->states[CIRCPAD_STATE_BURST].range_usec = 1000000; + ret->states[CIRCPAD_STATE_BURST].histogram[0] = 6; + ret->states[CIRCPAD_STATE_BURST].histogram[1] = 0; + ret->states[CIRCPAD_STATE_BURST].histogram[1] = 0; + ret->states[CIRCPAD_STATE_BURST].histogram_total_tokens = 6; + ret->states[CIRCPAD_STATE_BURST].use_rtt_estimate = 0; + ret->states[CIRCPAD_STATE_BURST].length_includes_nonpadding = 1; + + return ret; +} + +static void +helper_create_conditional_machines(void) +{ + circpad_machine_t *add = helper_create_conditional_machine(); + origin_padding_machines = smartlist_new(); + relay_padding_machines = smartlist_new(); + + add->machine_num = 2; + add->is_origin_side = 1; + add->should_negotiate_end = 1; + add->target_hopnum = 2; + + /* Let's have this one end after 4 packets */ + add->states[CIRCPAD_STATE_BURST].length_dist.type = CIRCPAD_DIST_UNIFORM; + add->states[CIRCPAD_STATE_BURST].length_dist.param1 = 4; + add->states[CIRCPAD_STATE_BURST].length_dist.param2 = 4; + add->states[CIRCPAD_STATE_BURST].max_length = 4; + + add->conditions.requires_vanguards = 0; + add->conditions.min_hops = 2; + add->conditions.state_mask = CIRCPAD_CIRC_BUILDING| + CIRCPAD_CIRC_NO_STREAMS|CIRCPAD_CIRC_HAS_RELAY_EARLY; + add->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL; + + smartlist_add(origin_padding_machines, add); + + add = helper_create_conditional_machine(); + add->machine_num = 3; + add->is_origin_side = 1; + add->should_negotiate_end = 1; + add->target_hopnum = 2; + + /* Let's have this one end after 4 packets */ + add->states[CIRCPAD_STATE_BURST].length_dist.type = CIRCPAD_DIST_UNIFORM; + add->states[CIRCPAD_STATE_BURST].length_dist.param1 = 4; + add->states[CIRCPAD_STATE_BURST].length_dist.param2 = 4; + add->states[CIRCPAD_STATE_BURST].max_length = 4; + + add->conditions.requires_vanguards = 1; + add->conditions.min_hops = 3; + add->conditions.state_mask = CIRCPAD_CIRC_OPENED| + CIRCPAD_CIRC_STREAMS|CIRCPAD_CIRC_HAS_NO_RELAY_EARLY; + add->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL; + smartlist_add(origin_padding_machines, add); + + add = helper_create_conditional_machine(); + add->machine_num = 2; + smartlist_add(relay_padding_machines, add); + + add = helper_create_conditional_machine(); + add->machine_num = 3; + smartlist_add(relay_padding_machines, add); +} + +void +test_circuitpadding_conditions(void *arg) +{ + /** + * Test plan: + * 0. Make a few origin and client machines with diff conditions + * * vanguards, purposes, has_opened circs, no relay early + * * Client side should_negotiate_end + * * Length limits + * 1. Test STATE_END transitions + * 2. Test new machine after end with same conditions + * 3. Test new machine due to changed conditions + * * Esp: built event, no relay early, no streams + * XXX: Diff test: + * 1. Test STATE_END with pending timers + * 2. Test marking a circuit before padding callback fires + * 3. Test freeing a circuit before padding callback fires + */ + (void)arg; + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + + nodes_init(); + dummy_channel.cmux = circuitmux_alloc(); + relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel, + &dummy_channel); + client_side = (circuit_t *)origin_circuit_new(); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*NSEC_PER_USEC); + curr_mocked_time = 1*NSEC_PER_USEC; + + timers_initialize(); + helper_create_conditional_machines(); + + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + MOCK(node_get_by_id, + node_get_by_id_mock); + + /* Simulate extend. This should result in the original machine getting + * added, since the circuit is not built */ + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + /* Verify that machine #2 is added */ + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2); + + /* Deliver a padding cell to the client, to trigger burst state */ + circpad_cell_event_padding_sent(client_side); + + /* This should have trigger length shutdown condition on client.. */ + tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + /* Verify machine is gone from both sides */ + tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + + /* Send another event.. verify machine gets re-added properly + * (test race with shutdown) */ + simulate_single_hop_extend(client_side, relay_side, 1); + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2); + + TO_ORIGIN_CIRCUIT(client_side)->p_streams = 0; + circpad_machine_event_circ_has_no_streams(TO_ORIGIN_CIRCUIT(client_side)); + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2); + + /* Now make the circuit opened and send built event */ + TO_ORIGIN_CIRCUIT(client_side)->has_opened = 1; + circpad_machine_event_circ_built(TO_ORIGIN_CIRCUIT(client_side)); + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2); + + TO_ORIGIN_CIRCUIT(client_side)->remaining_relay_early_cells = 0; + circpad_machine_event_circ_has_no_relay_early( + TO_ORIGIN_CIRCUIT(client_side)); + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2); + + get_options_mutable()->HSLayer2Nodes = (void*)1; + TO_ORIGIN_CIRCUIT(client_side)->p_streams = (void*)1; + circpad_machine_event_circ_has_streams(TO_ORIGIN_CIRCUIT(client_side)); + + /* Verify different machine is added */ + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 3); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 3); + + /* Hold off on negotiated */ + deliver_negotiated = 0; + + /* Deliver a padding cell to the client, to trigger burst state */ + circpad_cell_event_padding_sent(client_side); + + /* This should have trigger length shutdown condition on client + * but not the response for the padding machine */ + tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_NE, NULL); + + /* Verify machine is gone from the relay (but negotiated not back yet */ + tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + + /* Add another hop and verify it's back */ + simulate_single_hop_extend(client_side, relay_side, 1); + + tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 3); + tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 3); + + tt_ptr_op(client_side->padding_info[0], OP_NE, NULL); + tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL); + + done: + /* XXX: Free everything */ + return; +} + +void +test_circuitpadding_circuitsetup_machine(void *arg) +{ + /** + * Test case plan: + * + * 1. Simulate a normal circuit setup pattern + * a. Application traffic + * + * FIXME: This should focus more on exercising the machine + * features rather than actual traffic patterns. For example, + * test cancellation and bins empty/refill + */ + (void)arg; + + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + + dummy_channel.cmux = circuitmux_alloc(); + client_side = TO_CIRCUIT(origin_circuit_new()); + relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel)); + + relay_side->purpose = CIRCUIT_PURPOSE_OR; + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + + nodes_init(); + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*NSEC_PER_USEC); + curr_mocked_time = 1*NSEC_PER_USEC; + + timers_initialize(); + circpad_machines_init(); + + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + MOCK(node_get_by_id, + node_get_by_id_mock); + + /* Test case #1: Build a 3 hop circuit, then wait and let pad */ + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + tt_int_op(n_client_cells, OP_EQ, 1); + tt_int_op(n_relay_cells, OP_EQ, 1); + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->is_padding_timer_scheduled, + OP_EQ, 0); + timers_advance_and_run(2000); + tt_int_op(n_client_cells, OP_EQ, 2); + tt_int_op(n_relay_cells, OP_EQ, 1); + + tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_GAP); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + timers_advance_and_run(5000); + tt_int_op(n_client_cells, OP_EQ, 2); + tt_int_op(n_relay_cells, OP_EQ, 2); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + timers_advance_and_run(2000); + tt_int_op(n_client_cells, OP_EQ, 3); + tt_int_op(n_relay_cells, OP_EQ, 2); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + timers_advance_and_run(5000); + tt_int_op(n_client_cells, OP_EQ, 3); + tt_int_op(n_relay_cells, OP_EQ, 3); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + timers_advance_and_run(2000); + tt_int_op(n_client_cells, OP_EQ, 4); + tt_int_op(n_relay_cells, OP_EQ, 3); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + timers_advance_and_run(5000); + tt_int_op(n_client_cells, OP_EQ, 4); + tt_int_op(n_relay_cells, OP_EQ, 4); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + timers_advance_and_run(2000); + tt_int_op(n_client_cells, OP_EQ, 5); + tt_int_op(n_relay_cells, OP_EQ, 4); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + timers_advance_and_run(5000); + tt_int_op(n_client_cells, OP_EQ, 5); + tt_int_op(n_relay_cells, OP_EQ, 5); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + timers_advance_and_run(2000); + tt_int_op(n_client_cells, OP_EQ, 6); + tt_int_op(n_relay_cells, OP_EQ, 5); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + timers_advance_and_run(5000); + tt_int_op(n_client_cells, OP_EQ, 6); + tt_int_op(n_relay_cells, OP_EQ, 6); + + tt_int_op(client_side->padding_info[0]->current_state, + OP_EQ, CIRCPAD_STATE_END); + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + tt_int_op(relay_side->padding_info[0]->current_state, + OP_EQ, CIRCPAD_STATE_GAP); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + + /* Verify we can't schedule padding in END state */ + circpad_decision_t ret = + circpad_machine_schedule_padding(client_side->padding_info[0]); + tt_int_op(ret, OP_EQ, CIRCPAD_STATE_UNCHANGED); + + /* Simulate application traffic */ + circpad_cell_event_nonpadding_sent(client_side); + circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_OUT); + circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_IN); + circpad_deliver_recognized_relay_cell_events(client_side, RELAY_COMMAND_DATA, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next); + + tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + tt_int_op(n_client_cells, OP_EQ, 6); + tt_int_op(n_relay_cells, OP_EQ, 7); + + // Test timer cancellation + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + timers_advance_and_run(5000); + circpad_cell_event_padding_received(client_side); + + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_GAP); + + tt_int_op(n_client_cells, OP_EQ, 8); + tt_int_op(n_relay_cells, OP_EQ, 8); + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + + /* Test timer cancel due to state rules */ + circpad_cell_event_nonpadding_sent(client_side); + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_EQ, 0); + circpad_cell_event_padding_received(client_side); + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + + /* Simulate application traffic to cancel timer */ + circpad_cell_event_nonpadding_sent(client_side); + circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_OUT); + circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_IN); + circpad_deliver_recognized_relay_cell_events(client_side, RELAY_COMMAND_DATA, + TO_ORIGIN_CIRCUIT(client_side)->cpath->next); + + tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL); + + tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL); + tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL); + + /* No cells sent, except negotiate end from relay */ + tt_int_op(n_client_cells, OP_EQ, 8); + tt_int_op(n_relay_cells, OP_EQ, 9); + + /* Test mark for close and free */ + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + timers_advance_and_run(5000); + circpad_cell_event_padding_received(client_side); + + tt_int_op(n_client_cells, OP_EQ, 10); + tt_int_op(n_relay_cells, OP_EQ, 10); + + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_BURST); + tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ, + CIRCPAD_STATE_GAP); + + tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec, + OP_NE, 0); + circuit_mark_for_close(client_side, END_CIRC_REASON_FLAG_REMOTE); + free_fake_orcirc(relay_side); + timers_advance_and_run(5000); + + /* No cells sent */ + tt_int_op(n_client_cells, OP_EQ, 10); + tt_int_op(n_relay_cells, OP_EQ, 10); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + + circuitmux_detach_all_circuits(dummy_channel.cmux, NULL); + circuitmux_free(dummy_channel.cmux); + timers_shutdown(); + monotime_disable_test_mocking(); + UNMOCK(circuit_package_relay_cell); + UNMOCK(circuitmux_attach_circuit); + + return; +} + +/** Helper function: Initializes a padding machine where every state uses the + * uniform probability distribution. */ +static void +helper_circpad_circ_distribution_machine_setup(int min, int max) +{ + circpad_machine_states_init(&circ_client_machine, 7); + + circpad_state_t *zero_st = &circ_client_machine.states[0]; + zero_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 1; + zero_st->iat_dist.type = CIRCPAD_DIST_UNIFORM; + zero_st->iat_dist.param1 = min; + zero_st->iat_dist.param2 = max; + zero_st->start_usec = min; + zero_st->range_usec = max; + + circpad_state_t *first_st = &circ_client_machine.states[1]; + first_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 2; + first_st->iat_dist.type = CIRCPAD_DIST_LOGISTIC; + first_st->iat_dist.param1 = min; + first_st->iat_dist.param2 = max; + first_st->start_usec = min; + first_st->range_usec = max; + + circpad_state_t *second_st = &circ_client_machine.states[2]; + second_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 3; + second_st->iat_dist.type = CIRCPAD_DIST_LOG_LOGISTIC; + second_st->iat_dist.param1 = min; + second_st->iat_dist.param2 = max; + second_st->start_usec = min; + second_st->range_usec = max; + + circpad_state_t *third_st = &circ_client_machine.states[3]; + third_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 4; + third_st->iat_dist.type = CIRCPAD_DIST_GEOMETRIC; + third_st->iat_dist.param1 = min; + third_st->iat_dist.param2 = max; + third_st->start_usec = min; + third_st->range_usec = max; + + circpad_state_t *fourth_st = &circ_client_machine.states[4]; + fourth_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 5; + fourth_st->iat_dist.type = CIRCPAD_DIST_WEIBULL; + fourth_st->iat_dist.param1 = min; + fourth_st->iat_dist.param2 = max; + fourth_st->start_usec = min; + fourth_st->range_usec = max; + + circpad_state_t *fifth_st = &circ_client_machine.states[5]; + fifth_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 6; + fifth_st->iat_dist.type = CIRCPAD_DIST_PARETO; + fifth_st->iat_dist.param1 = min; + fifth_st->iat_dist.param2 = max; + fifth_st->start_usec = min; + fifth_st->range_usec = max; +} + +/** Simple test that the padding delays sampled from a uniform distribution + * actually faill within the uniform distribution range. */ +/* TODO: Upgrade this test so that each state tests a different prob + * distribution */ +static void +test_circuitpadding_sample_distribution(void *arg) +{ + circpad_machineinfo_t *mi; + int n_samples; + int n_states; + + (void) arg; + + /* mock this function so that we dont actually schedule any padding */ + MOCK(circpad_machine_schedule_padding, + circpad_machine_schedule_padding_mock); + + /* Initialize a machine with multiple probability distributions that should + * return values between 0 and 5 */ + circpad_machines_init(); + helper_circpad_circ_distribution_machine_setup(0, 10); + + /* Initialize machine and circuits */ + client_side = TO_CIRCUIT(origin_circuit_new()); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + mi = client_side->padding_info[0]; + + /* For every state, sample a bunch of values from the distribution and ensure + * they fall within range. */ + for (n_states = 0 ; n_states < 6; n_states++) { + /* Make sure we in the right state */ + tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, n_states); + + for (n_samples = 0; n_samples < 100; n_samples++) { + circpad_delay_t delay = circpad_machine_sample_delay(mi); + tt_int_op(delay, OP_GE, 0); + tt_int_op(delay, OP_LE, 10); + } + + /* send a non-padding cell to move to the next machine state */ + circpad_cell_event_nonpadding_received((circuit_t*)client_side); + } + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); + UNMOCK(circpad_machine_schedule_padding); +} + +static circpad_decision_t +circpad_machine_transition_mock(circpad_machineinfo_t *mi, + circpad_event_t event) +{ + (void) mi; + (void) event; + + return CIRCPAD_STATE_UNCHANGED; +} + +/* Test per-machine padding rate limits */ +static void +test_circuitpadding_machine_rate_limiting(void *arg) +{ + (void) arg; + bool retval; + circpad_machineinfo_t *mi; + int i; + + /* Ignore machine transitions for the purposes of this function, we only + * really care about padding counts */ + MOCK(circpad_machine_transition, circpad_machine_transition_mock); + MOCK(circpad_send_command_to_hop, circpad_send_command_to_hop_mock); + + /* Setup machine and circuits */ + client_side = TO_CIRCUIT(origin_circuit_new()); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + helper_create_basic_machine(); + client_side->padding_machine[0] = &circ_client_machine; + client_side->padding_info[0] = + circpad_circuit_machineinfo_new(client_side, 0); + mi = client_side->padding_info[0]; + /* Set up the machine info so that we can get through the basic functions */ + mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE; + + /* First we are going to test the per-machine rate limits */ + circ_client_machine.max_padding_percent = 50; + circ_client_machine.allowed_padding_count = 100; + + /* Check padding limit, should be fine since we haven't sent anything yet. */ + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 0); + + /* Send 99 padding cells which is below circpad_global_allowed_cells=100, so + * the rate limit will not trigger */ + for (i=0;i<99;i++) { + circpad_send_padding_cell_for_callback(mi); + } + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 0); + + /* Now send another padding cell to pass circpad_global_allowed_cells=100, + and see that the limit will trigger */ + circpad_send_padding_cell_for_callback(mi); + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 1); + + retval = circpad_machine_schedule_padding(mi); + tt_int_op(retval, OP_EQ, CIRCPAD_STATE_UNCHANGED); + + /* Cover wrap */ + for (;ipadding_sent, OP_EQ, UINT16_MAX/2+1); + + tt_ptr_op(client_side->padding_info[0], OP_EQ, mi); + for (i=0;inonpadding_sent, OP_EQ, UINT16_MAX/2); + tt_int_op(mi->padding_sent, OP_EQ, UINT16_MAX/4+1); + + done: + free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side)); +} + +/* Test global padding rate limits */ +static void +test_circuitpadding_global_rate_limiting(void *arg) +{ + (void) arg; + bool retval; + circpad_machineinfo_t *mi; + int i; + + /* Ignore machine transitions for the purposes of this function, we only + * really care about padding counts */ + MOCK(circpad_machine_transition, circpad_machine_transition_mock); + MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); + MOCK(circuit_package_relay_cell, + circuit_package_relay_cell_mock); + MOCK(monotime_absolute_usec, mock_monotime_absolute_usec); + + monotime_init(); + monotime_enable_test_mocking(); + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; + timers_initialize(); + + client_side = (circuit_t *)origin_circuit_new(); + client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; + dummy_channel.cmux = circuitmux_alloc(); + + /* Setup machine and circuits */ + relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel, &dummy_channel); + relay_side->purpose = CIRCUIT_PURPOSE_OR; + helper_create_basic_machine(); + relay_side->padding_machine[0] = &circ_client_machine; + relay_side->padding_info[0] = + circpad_circuit_machineinfo_new(relay_side, 0); + mi = relay_side->padding_info[0]; + /* Set up the machine info so that we can get through the basic functions */ + mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE; + + simulate_single_hop_extend(client_side, relay_side, 1); + simulate_single_hop_extend(client_side, relay_side, 1); + + /* Now test the global limits by setting up the consensus */ + networkstatus_t vote1; + vote1.net_params = smartlist_new(); + smartlist_split_string(vote1.net_params, + "circpad_global_allowed_cells=100 circpad_global_max_padding_pct=50", + NULL, 0, 0); + /* Register global limits with the padding subsystem */ + circpad_new_consensus_params(&vote1); + + /* Check padding limit, should be fine since we haven't sent anything yet. */ + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 0); + + /* Send 99 padding cells which is below circpad_global_allowed_cells=100, so + * the rate limit will not trigger */ + for (i=0;i<99;i++) { + circpad_send_padding_cell_for_callback(mi); + } + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 0); + + /* Now send another padding cell to pass circpad_global_allowed_cells=100, + and see that the limit will trigger */ + circpad_send_padding_cell_for_callback(mi); + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 1); + + retval = circpad_machine_schedule_padding(mi); + tt_int_op(retval, OP_EQ, CIRCPAD_STATE_UNCHANGED); + + /* Now send 92 non-padding cells to get near the + * circpad_global_max_padding_pct=50 limit; in particular with 96 non-padding + * cells, the padding traffic is still 51% of total traffic so limit should + * trigger */ + for (i=0;i<92;i++) { + circpad_cell_event_nonpadding_sent(relay_side); + } + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 1); + + /* Send another non-padding cell to bring the padding traffic to 50% of total + * traffic and get past the limit */ + circpad_cell_event_nonpadding_sent(relay_side); + retval = circpad_machine_reached_padding_limit(mi); + tt_int_op(retval, OP_EQ, 0); + + done: + free_fake_orcirc(relay_side); + circuitmux_detach_all_circuits(dummy_channel.cmux, NULL); + circuitmux_free(dummy_channel.cmux); + SMARTLIST_FOREACH(vote1.net_params, char *, cp, tor_free(cp)); + smartlist_free(vote1.net_params); +} + +#define TEST_CIRCUITPADDING(name, flags) \ + { #name, test_##name, (flags), NULL, NULL } + +struct testcase_t circuitpadding_tests[] = { + //TEST_CIRCUITPADDING(circuitpadding_circuitsetup_machine, 0), + TEST_CIRCUITPADDING(circuitpadding_tokens, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_negotiation, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_wronghop, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_circuitsetup_machine, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_conditions, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_rtt, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_sample_distribution, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_machine_rate_limiting, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_global_rate_limiting, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_token_removal_lower, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_token_removal_higher, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_closest_token_removal, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_closest_token_removal_usec, TT_FORK), + TEST_CIRCUITPADDING(circuitpadding_token_removal_exact, TT_FORK), + END_OF_TESTCASES +}; + diff --git a/src/test/test_containers.c b/src/test/test_containers.c index 717eb0892a..ad0edf4aa3 100644 --- a/src/test/test_containers.c +++ b/src/test/test_containers.c @@ -96,6 +96,30 @@ test_container_smartlist_basic(void *arg) tor_free(v555); } +/** Test SMARTLIST_FOREACH_REVERSE_BEGIN loop macro */ +static void +test_container_smartlist_foreach_reverse(void *arg) +{ + smartlist_t *sl = smartlist_new(); + int i; + + (void) arg; + + /* Add integers to smartlist in increasing order */ + for (i=0;i<100;i++) { + smartlist_add(sl, (void*)(uintptr_t)i); + } + + /* Pop them out in reverse and test their value */ + SMARTLIST_FOREACH_REVERSE_BEGIN(sl, void*, k) { + i--; + tt_ptr_op(k, OP_EQ, (void*)(uintptr_t)i); + } SMARTLIST_FOREACH_END(k); + + done: + smartlist_free(sl); +} + /** Run unit tests for smartlist-of-strings functionality. */ static void test_container_smartlist_strings(void *arg) @@ -1281,6 +1305,7 @@ test_container_smartlist_strings_eq(void *arg) struct testcase_t container_tests[] = { CONTAINER_LEGACY(smartlist_basic), CONTAINER_LEGACY(smartlist_strings), + CONTAINER_LEGACY(smartlist_foreach_reverse), CONTAINER_LEGACY(smartlist_overlap), CONTAINER_LEGACY(smartlist_digests), CONTAINER_LEGACY(smartlist_join), -- cgit v1.2.3-54-g00ecf From 8ad497bb578b13c66489843905764a60545e6388 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 23 Oct 2018 21:02:31 +0000 Subject: Config option to specify specific MiddleNodes. Hope is this will make it easier to test on the live tor network. Does not need to be merged if we don't want to, but will come in handy for researchers. Co-authored-by: George Kadianakis --- doc/tor.1.txt | 30 +++++++++++++++++++++++------- src/app/config/config.c | 6 ++++++ src/app/config/or_options_st.h | 3 +++ src/core/or/circuitbuild.c | 19 ++++++++++++++++++- src/feature/nodelist/nodelist.c | 2 +- src/feature/nodelist/routerlist.c | 2 ++ 6 files changed, 53 insertions(+), 9 deletions(-) diff --git a/doc/tor.1.txt b/doc/tor.1.txt index 4ff789a931..455356163c 100644 --- a/doc/tor.1.txt +++ b/doc/tor.1.txt @@ -1020,6 +1020,21 @@ The following options are useful only for clients (that is, if The .exit address notation, if enabled via MapAddress, overrides this option. +[[MiddleNodes]] **MiddleNodes** __node__,__node__,__...__:: + A list of identity fingerprints and country codes of nodes + to use for "middle" hops in your normal circuits. + Normal circuits include all circuits except for direct connections + to directory servers. Middle hops are all hops other than exit and entry. + ++ + The HSLayer2Node and HSLayer3Node options override this option for onion + service circuits, if they are set. The vanguards addon will read this + option, and if set, it will set HSLayer2Nodes and HSLayer3Nodes to nodes + from this set. ++ + The ExcludeNodes option overrides this option: any node listed in both + MiddleNodes and ExcludeNodes is treated as excluded. See + the **ExcludeNodes** option for more information on how to specify nodes. + [[EntryNodes]] **EntryNodes** __node__,__node__,__...__:: A list of identity fingerprints and country codes of nodes to use for the first hop in your normal circuits. @@ -1036,13 +1051,14 @@ The following options are useful only for clients (that is, if If StrictNodes is set to 1, Tor will treat solely the ExcludeNodes option as a requirement to follow for all the circuits you generate, even if doing so will break functionality for you (StrictNodes applies to neither - ExcludeExitNodes nor to ExitNodes). If StrictNodes is set to 0, Tor will - still try to avoid nodes in the ExcludeNodes list, but it will err on the - side of avoiding unexpected errors. Specifically, StrictNodes 0 tells Tor - that it is okay to use an excluded node when it is *necessary* to perform - relay reachability self-tests, connect to a hidden service, provide a - hidden service to a client, fulfill a .exit request, upload directory - information, or download directory information. (Default: 0) + ExcludeExitNodes nor to ExitNodes, nor to MiddleNodes). If StrictNodes + is set to 0, Tor will still try to avoid nodes in the ExcludeNodes list, + but it will err on the side of avoiding unexpected errors. + Specifically, StrictNodes 0 tells Tor that it is okay to use an excluded + node when it is *necessary* to perform relay reachability self-tests, + connect to a hidden service, provide a hidden service to a client, + fulfill a .exit request, upload directory information, or download + directory information. (Default: 0) [[FascistFirewall]] **FascistFirewall** **0**|**1**:: If 1, Tor will only create outgoing connections to ORs running on ports diff --git a/src/app/config/config.c b/src/app/config/config.c index 32b487dd24..728b7ff65f 100644 --- a/src/app/config/config.c +++ b/src/app/config/config.c @@ -418,6 +418,10 @@ static config_var_t option_vars_[] = { V(ExcludeExitNodes, ROUTERSET, NULL), OBSOLETE("ExcludeSingleHopRelays"), V(ExitNodes, ROUTERSET, NULL), + /* Researchers need a way to tell their clients to use specific + * middles that they also control, to allow safe live-network + * experimentation with new padding machines. */ + V(MiddleNodes, ROUTERSET, NULL), V(ExitPolicy, LINELIST, NULL), V(ExitPolicyRejectPrivate, BOOL, "1"), V(ExitPolicyRejectLocalInterfaces, BOOL, "0"), @@ -1690,6 +1694,7 @@ options_need_geoip_info(const or_options_t *options, const char **reason_out) int routerset_usage = routerset_needs_geoip(options->EntryNodes) || routerset_needs_geoip(options->ExitNodes) || + routerset_needs_geoip(options->MiddleNodes) || routerset_needs_geoip(options->ExcludeExitNodes) || routerset_needs_geoip(options->ExcludeNodes) || routerset_needs_geoip(options->HSLayer2Nodes) || @@ -2129,6 +2134,7 @@ options_act(const or_options_t *old_options) options->HSLayer2Nodes) || !routerset_equal(old_options->HSLayer3Nodes, options->HSLayer3Nodes) || + !routerset_equal(old_options->MiddleNodes, options->MiddleNodes) || options->StrictNodes != old_options->StrictNodes) { log_info(LD_CIRC, "Changed to using entry guards or bridges, or changed " diff --git a/src/app/config/or_options_st.h b/src/app/config/or_options_st.h index c2bc1079a5..63a17c9771 100644 --- a/src/app/config/or_options_st.h +++ b/src/app/config/or_options_st.h @@ -72,6 +72,9 @@ struct or_options_t { routerset_t *ExitNodes; /**< Structure containing nicknames, digests, * country codes and IP address patterns of ORs to * consider as exits. */ + routerset_t *MiddleNodes; /**< Structure containing nicknames, digests, + * country codes and IP address patterns of ORs to + * consider as middles. */ routerset_t *EntryNodes;/**< Structure containing nicknames, digests, * country codes and IP address patterns of ORs to * consider as entry points. */ diff --git a/src/core/or/circuitbuild.c b/src/core/or/circuitbuild.c index 2d8bc4d4ad..22e4cf96d8 100644 --- a/src/core/or/circuitbuild.c +++ b/src/core/or/circuitbuild.c @@ -2610,7 +2610,24 @@ choose_good_middle_server(uint8_t purpose, return choice; } - choice = router_choose_random_node(excluded, options->ExcludeNodes, flags); + if (options->MiddleNodes) { + smartlist_t *sl = smartlist_new(); + routerset_get_all_nodes(sl, options->MiddleNodes, + options->ExcludeNodes, 1); + + smartlist_subtract(sl, excluded); + + choice = node_sl_choose_by_bandwidth(sl, WEIGHT_FOR_MID); + smartlist_free(sl); + if (choice) { + log_fn(LOG_INFO, LD_CIRC, "Chose fixed middle node: %s", + hex_str(choice->identity, DIGEST_LEN)); + } else { + log_fn(LOG_NOTICE, LD_CIRC, "Restricted middle not available"); + } + } else { + choice = router_choose_random_node(excluded, options->ExcludeNodes, flags); + } smartlist_free(excluded); return choice; } diff --git a/src/feature/nodelist/nodelist.c b/src/feature/nodelist/nodelist.c index 15b3f7b600..33601fe1fa 100644 --- a/src/feature/nodelist/nodelist.c +++ b/src/feature/nodelist/nodelist.c @@ -2350,7 +2350,7 @@ compute_frac_paths_available(const networkstatus_t *consensus, const int authdir = authdir_mode_v3(options); count_usable_descriptors(num_present_out, num_usable_out, - mid, consensus, now, NULL, + mid, consensus, now, options->MiddleNodes, USABLE_DESCRIPTOR_ALL); log_debug(LD_NET, "%s: %d present, %d usable", diff --git a/src/feature/nodelist/routerlist.c b/src/feature/nodelist/routerlist.c index b4d56459df..c8a658414b 100644 --- a/src/feature/nodelist/routerlist.c +++ b/src/feature/nodelist/routerlist.c @@ -3221,6 +3221,8 @@ refresh_all_country_info(void) routerset_refresh_countries(options->EntryNodes); if (options->ExitNodes) routerset_refresh_countries(options->ExitNodes); + if (options->MiddleNodes) + routerset_refresh_countries(options->MiddleNodes); if (options->ExcludeNodes) routerset_refresh_countries(options->ExcludeNodes); if (options->ExcludeExitNodes) -- cgit v1.2.3-54-g00ecf From 2ccf3268375cd46e8c948e94ba58e0d2f03fe722 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 27 Nov 2018 01:56:23 +0200 Subject: Implement and test probability distributions used by WTF-PAD. This project introduces the prob_distr.c subsystem which implements all the probability distributions that WTF-PAD needs. It also adds unittests for all of them. Code and tests courtesy of Riastradh. Co-authored-by: Taylor R Campbell Co-authored-by: Mike Perry --- src/core/or/circuitpadding.c | 8 +- src/lib/crypt_ops/crypto_rand.c | 11 + src/lib/crypt_ops/crypto_rand.h | 1 + src/lib/math/.may_include | 2 + src/lib/math/fp.c | 25 + src/lib/math/fp.h | 1 + src/lib/math/include.am | 6 +- src/lib/math/prob_distr.c | 1628 +++++++++++++++++++++++++++++++++++++++ src/lib/math/prob_distr.h | 156 ++++ src/test/include.am | 2 + src/test/prob_distr_mpfr_ref.c | 64 ++ src/test/test.c | 1 + src/test/test.h | 2 + src/test/test_prob_distr.c | 1414 ++++++++++++++++++++++++++++++++++ src/test/test_slow.c | 1 + 15 files changed, 3316 insertions(+), 6 deletions(-) create mode 100644 src/lib/math/prob_distr.c create mode 100644 src/lib/math/prob_distr.h create mode 100644 src/test/prob_distr_mpfr_ref.c create mode 100644 src/test/test_prob_distr.c diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index 6cfbf4ba56..a9d927619d 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -516,10 +516,10 @@ circpad_distribution_sample(circpad_distribution_t dist) * param1 is Alpha, param2 is Beta */ return dist.param1 * pow(p/(1.0-p), 1.0/dist.param2); case CIRCPAD_DIST_GEOMETRIC: - p = crypto_rand_double(); - /* https://github.com/distributions-io/geometric-quantile/ - * param1 is 'p' (success probability) */ - return ceil(tor_mathlog(1.0-p)/tor_mathlog(1.0-dist.param1)); + { + /* param1 is 'p' (success probability) */ + return geometric_sample(dist.param1); + } case CIRCPAD_DIST_WEIBULL: p = crypto_rand_double(); /* https://en.wikipedia.org/wiki/Weibull_distribution \ diff --git a/src/lib/crypt_ops/crypto_rand.c b/src/lib/crypt_ops/crypto_rand.c index cffd0610f3..7a2c417e5a 100644 --- a/src/lib/crypt_ops/crypto_rand.c +++ b/src/lib/crypt_ops/crypto_rand.c @@ -528,6 +528,17 @@ crypto_rand_unmocked(char *to, size_t n) #endif } +/** + * Draw an unsigned 32-bit integer uniformly at random. + */ +uint32_t +crypto_rand_uint32(void) +{ + uint32_t rand; + crypto_rand((void*)&rand, sizeof(rand)); + return rand; +} + /** * Return a pseudorandom integer, chosen uniformly from the values * between 0 and max-1 inclusive. max must be between 1 and diff --git a/src/lib/crypt_ops/crypto_rand.h b/src/lib/crypt_ops/crypto_rand.h index 0c538d81ac..61fd82c806 100644 --- a/src/lib/crypt_ops/crypto_rand.h +++ b/src/lib/crypt_ops/crypto_rand.h @@ -27,6 +27,7 @@ int crypto_rand_int(unsigned int max); int crypto_rand_int_range(unsigned int min, unsigned int max); uint64_t crypto_rand_uint64_range(uint64_t min, uint64_t max); time_t crypto_rand_time_range(time_t min, time_t max); +uint32_t crypto_rand_uint32(void); uint64_t crypto_rand_uint64(uint64_t max); double crypto_rand_double(void); struct tor_weak_rng_t; diff --git a/src/lib/math/.may_include b/src/lib/math/.may_include index 1fd26864dc..f8bc264a5f 100644 --- a/src/lib/math/.may_include +++ b/src/lib/math/.may_include @@ -3,3 +3,5 @@ orconfig.h lib/cc/*.h lib/log/*.h lib/math/*.h +lib/testsupport/*.h +lib/crypt_ops/*.h diff --git a/src/lib/math/fp.c b/src/lib/math/fp.c index d5989db637..57082fa468 100644 --- a/src/lib/math/fp.c +++ b/src/lib/math/fp.c @@ -117,3 +117,28 @@ ENABLE_GCC_WARNING(double-promotion) ENABLE_GCC_WARNING(float-conversion) #endif } + +/* isinf() wrapper for tor */ +int +tor_isinf(double x) +{ + /* Same as above, work around the "double promotion" warnings */ +#if defined(MINGW_ANY) && GCC_VERSION >= 409 +#define PROBLEMATIC_FLOAT_CONVERSION_WARNING +DISABLE_GCC_WARNING(float-conversion) +#endif /* defined(MINGW_ANY) && GCC_VERSION >= 409 */ +#if defined(__clang__) +#if __has_warning("-Wdouble-promotion") +#define PROBLEMATIC_DOUBLE_PROMOTION_WARNING +DISABLE_GCC_WARNING(double-promotion) +#endif +#endif /* defined(__clang__) */ + return isinf(x); +#ifdef PROBLEMATIC_DOUBLE_PROMOTION_WARNING +ENABLE_GCC_WARNING(double-promotion) +#endif +#ifdef PROBLEMATIC_FLOAT_CONVERSION_WARNING +ENABLE_GCC_WARNING(float-conversion) +#endif +} + diff --git a/src/lib/math/fp.h b/src/lib/math/fp.h index e27b8f8d80..ddf3ed24d6 100644 --- a/src/lib/math/fp.h +++ b/src/lib/math/fp.h @@ -19,5 +19,6 @@ double tor_mathlog(double d) ATTR_CONST; long tor_lround(double d) ATTR_CONST; int64_t tor_llround(double d) ATTR_CONST; int64_t clamp_double_to_int64(double number); +int tor_isinf(double x); #endif diff --git a/src/lib/math/include.am b/src/lib/math/include.am index b088b3f3cc..6d65ce90a7 100644 --- a/src/lib/math/include.am +++ b/src/lib/math/include.am @@ -7,7 +7,8 @@ endif src_lib_libtor_math_a_SOURCES = \ src/lib/math/fp.c \ - src/lib/math/laplace.c + src/lib/math/laplace.c \ + src/lib/math/prob_distr.c src_lib_libtor_math_testing_a_SOURCES = \ @@ -17,4 +18,5 @@ src_lib_libtor_math_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS) noinst_HEADERS += \ src/lib/math/fp.h \ - src/lib/math/laplace.h + src/lib/math/laplace.h \ + src/lib/math/prob_distr.h diff --git a/src/lib/math/prob_distr.c b/src/lib/math/prob_distr.c new file mode 100644 index 0000000000..832d3b4d96 --- /dev/null +++ b/src/lib/math/prob_distr.c @@ -0,0 +1,1628 @@ +/* Copyright (c) 2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file prob_distr.c + * + * \brief + * Implements various probability distributions. + * Almost all code is courtesy of Riastradh. + * + * \details + * Here are some details that might help you understand this file: + * + * - Throughout this file, `eps' means the largest relative error of a + * correctly rounded floating-point operation, which in binary64 + * floating-point arithmetic is 2^-53. Here the relative error of a + * true value x from a computed value y is |x - y|/|x|. This + * definition of epsilon is conventional for numerical analysts when + * writing error analyses. (If your libm doesn't provide correctly + * rounded exp and log, their relative error is usually below 2*2^-53 + * and probably closer to 1.1*2^-53 instead.) + * + * The C constant DBL_EPSILON is actually twice this, and should + * perhaps rather be named ulp(1) -- that is, it is the distance from + * 1 to the next greater floating-point number, which is usually of + * more interest to programmers and hardware engineers. + * + * Since this file is concerned mainly with error bounds rather than + * with low-level bit-hacking of floating-point numbers, we adopt the + * numerical analysts' definition in the comments, though we do use + * DBL_EPSILON in a handful of places where it is convenient to use + * some function of eps = DBL_EPSILON/2 in a case analysis. + * + * - In various functions (e.g. sample_log_logistic()) we jump through hoops so + * that we can use reals closer to 0 than closer to 1, since we achieve much + * greater accuracy for floating point numbers near 0. In particular, we can + * represent differences as small as 10^-300 for numbers near 0, but of no + * less than 10^-16 for numbers near 1. + **/ + +#define PROB_DISTR_PRIVATE + +#include "orconfig.h" + +#include "lib/math/prob_distr.h" + +#include "lib/crypt_ops/crypto_rand.h" +#include "lib/cc/ctassert.h" + +#include +#include +#include + +/** Validators for downcasting macros below */ +#define validate_container_of(PTR, TYPE, FIELD) \ + (0 * sizeof((PTR) - &((TYPE *)(((char *)(PTR)) - \ + offsetof(TYPE, FIELD)))->FIELD)) +#define validate_const_container_of(PTR, TYPE, FIELD) \ + (0 * sizeof((PTR) - &((const TYPE *)(((const char *)(PTR)) - \ + offsetof(TYPE, FIELD)))->FIELD)) +/** Downcasting macro */ +#define container_of(PTR, TYPE, FIELD) \ + ((TYPE *)(((char *)(PTR)) - offsetof(TYPE, FIELD)) \ + + validate_container_of(PTR, TYPE, FIELD)) +/** Constified downcasting macro */ +#define const_container_of(PTR, TYPE, FIELD) \ + ((const TYPE *)(((const char *)(PTR)) - offsetof(TYPE, FIELD)) \ + + validate_const_container_of(PTR, TYPE, FIELD)) + +/** + * Count number of one bits in 32-bit word. + */ +static unsigned +bitcount32(uint32_t x) +{ + + /* Count two-bit groups. */ + x -= (x >> 1) & UINT32_C(0x55555555); + + /* Count four-bit groups. */ + x = ((x >> 2) & UINT32_C(0x33333333)) + (x & UINT32_C(0x33333333)); + + /* Count eight-bit groups. */ + x = (x + (x >> 4)) & UINT32_C(0x0f0f0f0f); + + /* Sum all eight-bit groups, and extract the sum. */ + return (x * UINT32_C(0x01010101)) >> 24; +} + +/** + * Count leading zeros in 32-bit word. + */ +static unsigned +clz32(uint32_t x) +{ + + /* Round up to a power of two. */ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + /* Subtract count of one bits from 32. */ + return (32 - bitcount32(x)); +} + +/* + * Some lemmas that will be used throughout this file to prove various error + * bounds: + * + * Lemma 1. If |d| <= 1/2, then 1/(1 + d) <= 2. + * + * Proof. If 0 <= d <= 1/2, then 1 + d >= 1, so that 1/(1 + d) <= 1. + * If -1/2 <= d <= 0, then 1 + d >= 1/2, so that 1/(1 + d) <= 2. QED. + * + * Lemma 2. If b = a*(1 + d)/(1 + d') for |d'| < 1/2 and nonzero a, b, + * then b = a*(1 + e) for |e| <= 2|d' - d|. + * + * Proof. |a - b|/|a| + * = |a - a*(1 + d)/(1 + d')|/|a| + * = |1 - (1 + d)/(1 + d')| + * = |(1 + d' - 1 - d)/(1 + d')| + * = |(d' - d)/(1 + d')| + * <= 2|d' - d|, by Lemma 1, + * + * QED. + * + * Lemma 3. For |d|, |d'| < 1/4, + * + * |log((1 + d)/(1 + d'))| <= 4|d - d'|. + * + * Proof. Write + * + * log((1 + d)/(1 + d')) + * = log(1 + (1 + d)/(1 + d') - 1) + * = log(1 + (1 + d - 1 - d')/(1 + d') + * = log(1 + (d - d')/(1 + d')). + * + * By Lemma 1, |(d - d')/(1 + d')| < 2|d' - d| < 1, so the Taylor + * series of log(1 + x) converges absolutely for (d - d')/(1 + d'), + * and thus we have + * + * |log(1 + (d - d')/(1 + d'))| + * = |\sum_{n=1}^\infty ((d - d')/(1 + d'))^n/n| + * <= \sum_{n=1}^\infty |(d - d')/(1 + d')|^n/n + * <= \sum_{n=1}^\infty |2(d' - d)|^n/n + * <= \sum_{n=1}^\infty |2(d' - d)|^n + * = 1/(1 - |2(d' - d)|) + * <= 4|d' - d|, + * + * QED. + * + * Lemma 4. If 1/e <= 1 + x <= e, then + * + * log(1 + (1 + d) x) = (1 + d') log(1 + x) + * + * for |d'| < 8|d|. + * + * Proof. Write + * + * log(1 + (1 + d) x) + * = log(1 + x + x*d) + * = log((1 + x) (1 + x + x*d)/(1 + x)) + * = log(1 + x) + log((1 + x + x*d)/(1 + x)) + * = log(1 + x) (1 + log((1 + x + x*d)/(1 + x))/log(1 + x)). + * + * The relative error is bounded by + * + * |log((1 + x + x*d)/(1 + x))/log(1 + x)| + * <= 4|x + x*d - x|/|log(1 + x)|, by Lemma 3, + * = 4|x*d|/|log(1 + x)| + * < 8|d|, + * + * since in this range 0 < 1 - 1/e < x/log(1 + x) <= e - 1 < 2. QED. + */ + +/** + * Compute the logistic function: f(x) = 1/(1 + e^{-x}) = e^x/(1 + e^x). + * Maps a log-odds-space probability in [-\infty, +\infty] into a direct-space + * probability in [0,1]. Inverse of logit. + * + * Ill-conditioned for large x; the identity logistic(-x) = 1 - + * logistic(x) and the function logistichalf(x) = logistic(x) - 1/2 may + * help to rearrange a computation. + * + * This implementation gives relative error bounded by 7 eps. + */ +STATIC double +logistic(double x) +{ + if (x <= log(DBL_EPSILON/2)) { + /* + * If x <= log(DBL_EPSILON/2) = log(eps), then e^x <= eps. In this case + * we will approximate the logistic() function with e^x because the + * relative error is less than eps. Here is a calculation of the + * relative error between the logistic() function and e^x and a proof + * that it's less than eps: + * + * |e^x - e^x/(1 + e^x)|/|e^x/(1 + e^x)| + * <= |1 - 1/(1 + e^x)|*|1 + e^x| + * = |e^x/(1 + e^x)|*|1 + e^x| + * = |e^x| + * <= eps. + */ + return exp(x); /* return e^x */ + } else if (x <= -log(DBL_EPSILON/2)) { + /* + * e^{-x} > 0, so 1 + e^{-x} > 1, and 0 < 1/(1 + + * e^{-x}) < 1; further, since e^{-x} < 1 + e^{-x}, we + * also have 0 < 1/(1 + e^{-x}) < 1. Thus, if exp has + * relative error d0, + has relative error d1, and / + * has relative error d2, then we get + * + * (1 + d2)/[(1 + (1 + d0) e^{-x})(1 + d1)] + * = (1 + d0)/[1 + e^{-x} + d0 e^{-x} + * + d1 + d1 e^{-x} + d0 d1 e^{-x}] + * = (1 + d0)/[(1 + e^{-x}) + * * (1 + d0 e^{-x}/(1 + e^{-x}) + * + d1/(1 + e^{-x}) + * + d0 d1 e^{-x}/(1 + e^{-x}))]. + * = (1 + d0)/[(1 + e^{-x})(1 + d')] + * = [1/(1 + e^{-x})] (1 + d0)/(1 + d') + * + * where + * + * d' = d0 e^{-x}/(1 + e^{-x}) + * + d1/(1 + e^{-x}) + * + d0 d1 e^{-x}/(1 + e^{-x}). + * + * By Lemma 2 this relative error is bounded by + * + * 2|d0 - d'| + * = 2|d0 - d0 e^{-x}/(1 + e^{-x}) + * - d1/(1 + e^{-x}) + * - d0 d1 e^{-x}/(1 + e^{-x})| + * <= 2|d0| + 2|d0 e^{-x}/(1 + e^{-x})| + * + 2|d1/(1 + e^{-x})| + * + 2|d0 d1 e^{-x}/(1 + e^{-x})| + * <= 2|d0| + 2|d0| + 2|d1| + 2|d0 d1| + * <= 4|d0| + 2|d1| + 2|d0 d1| + * <= 6 eps + 2 eps^2. + */ + return 1/(1 + exp(-x)); + } else { + /* + * e^{-x} <= eps, so the relative error of 1 from 1/(1 + * + e^{-x}) is + * + * |1/(1 + e^{-x}) - 1|/|1/(1 + e^{-x})| + * = |e^{-x}/(1 + e^{-x})|/|1/(1 + e^{-x})| + * = |e^{-x}| + * <= eps. + * + * This computation avoids an intermediate overflow + * exception, although the effect on the result is + * harmless. + * + * XXX Should maybe raise inexact here. + */ + return 1; + } +} + +/** + * Compute the logit function: log p/(1 - p). Defined on [0,1]. Maps + * a direct-space probability in [0,1] to a log-odds-space probability + * in [-\infty, +\infty]. Inverse of logistic. + * + * Ill-conditioned near 1/2 and 1; the identity logit(1 - p) = + * -logit(p) and the function logithalf(p0) = logit(1/2 + p0) may help + * to rearrange a computation for p in [1/(1 + e), 1 - 1/(1 + e)]. + * + * This implementation gives relative error bounded by 10 eps. + */ +STATIC double +logit(double p) +{ + + /* logistic(-1) <= p <= logistic(+1) */ + if (1/(1 + exp(1)) <= p && p <= 1/(1 + exp(-1))) { + /* + * For inputs near 1/2, we want to compute log1p(near + * 0) rather than log(near 1), so write this as: + * + * log(p/(1 - p)) = -log((1 - p)/p) + * = -log(1 + (1 - p)/p - 1) + * = -log(1 + (1 - p - p)/p) + * = -log(1 + (1 - 2p)/p). + * + * Since p = 2p/2 <= 1 <= 2*2p = 4p, the floating-point + * evaluation of 1 - 2p is exact; the only error arises + * from division and log1p. First, note that if + * logistic(-1) <= p <= logistic(+1), (1 - 2p)/p lies + * in the bounds of Lemma 4. + * + * If division has relative error d0 and log1p has + * relative error d1, the outcome is + * + * -(1 + d1) log(1 + (1 - 2p) (1 + d0)/p) + * = -(1 + d1) (1 + d') log(1 + (1 - 2p)/p) + * = -(1 + d1 + d' + d1 d') log(1 + (1 - 2p)/p). + * + * where |d'| < 8|d0| by Lemma 4. The relative error + * is then bounded by + * + * |d1 + d' + d1 d'| + * <= |d1| + 8|d0| + 8|d1 d0| + * <= 9 eps + 8 eps^2. + */ + return -log1p((1 - 2*p)/p); + } else { + /* + * For inputs near 0, although 1 - p may be rounded to + * 1, it doesn't matter much because the magnitude of + * the result is so much larger. For inputs near 1, we + * can compute 1 - p exactly, although the precision on + * the input is limited so we won't ever get more than + * about 700 for the output. + * + * If - has relative error d0, / has relative error d1, + * and log has relative error d2, then + * + * (1 + d2) log((1 + d0) p/[(1 - p)(1 + d1)]) + * = (1 + d2) [log(p/(1 - p)) + log((1 + d0)/(1 + d1))] + * = log(p/(1 - p)) + d2 log(p/(1 - p)) + * + (1 + d2) log((1 + d0)/(1 + d1)) + * = log(p/(1 - p))*[1 + d2 + + * + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))] + * + * Since 0 <= p < logistic(-1) or logistic(+1) < p <= + * 1, we have |log(p/(1 - p))| > 1. Hence this error + * is bounded by + * + * |d2 + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))| + * <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1)) + * / log(p/(1 - p))| + * <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))| + * <= |d2| + 4|(1 + d2) (d0 - d1)|, by Lemma 3, + * <= |d2| + 4|d0 - d1 + d2 d0 - d1 d0| + * <= |d2| + 4|d0| + 4|d1| + 4|d2 d0| + 4|d1 d0| + * <= 9 eps + 8 eps^2. + */ + return log(p/(1 - p)); + } +} + +/** + * Compute the logit function, translated in input by 1/2: logithalf(p) + * = logit(1/2 + p). Defined on [-1/2, 1/2]. Inverse of logistichalf. + * + * Ill-conditioned near +/-1/2. If |p0| > 1/2 - 1/(1 + e), it may be + * better to compute 1/2 + p0 or -1/2 - p0 and to use logit instead. + * This implementation gives relative error bounded by 34 eps. + */ +STATIC double +logithalf(double p0) +{ + + if (fabs(p0) <= 0.5 - 1/(1 + exp(1))) { + /* + * logit(1/2 + p0) + * = log((1/2 + p0)/(1 - (1/2 + p0))) + * = log((1/2 + p0)/(1/2 - p0)) + * = log(1 + (1/2 + p0)/(1/2 - p0) - 1) + * = log(1 + (1/2 + p0 - (1/2 - p0))/(1/2 - p0)) + * = log(1 + (1/2 + p0 - 1/2 + p0)/(1/2 - p0)) + * = log(1 + 2 p0/(1/2 - p0)) + * + * If the error of subtraction is d0, the error of + * division is d1, and the error of log1p is d2, then + * what we compute is + * + * (1 + d2) log(1 + (1 + d1) 2 p0/[(1 + d0) (1/2 - p0)]) + * = (1 + d2) log(1 + (1 + d') 2 p0/(1/2 - p0)) + * = (1 + d2) (1 + d'') log(1 + 2 p0/(1/2 - p0)) + * = (1 + d2 + d'' + d2 d'') log(1 + 2 p0/(1/2 - p0)), + * + * where |d'| < 2|d0 - d1| <= 4 eps by Lemma 2, and + * |d''| < 8|d'| < 32 eps by Lemma 4 since + * + * 1/e <= 1 + 2*p0/(1/2 - p0) <= e + * + * when |p0| <= 1/2 - 1/(1 + e). Hence the relative + * error is bounded by + * + * |d2 + d'' + d2 d''| + * <= |d2| + |d''| + |d2 d''| + * <= |d1| + 32 |d0| + 32 |d1 d0| + * <= 33 eps + 32 eps^2. + */ + return log1p(2*p0/(0.5 - p0)); + } else { + /* + * We have a choice of computing logit(1/2 + p0) or + * -logit(1 - (1/2 + p0)) = -logit(1/2 - p0). It + * doesn't matter which way we do this: either way, + * since 1/2 p0 <= 1/2 <= 2 p0, the sum and difference + * are computed exactly. So let's do the one that + * skips the final negation. + * + * The result is + * + * (1 + d1) log((1 + d0) (1/2 + p0)/[(1 + d2) (1/2 - p0)]) + * = (1 + d1) (1 + log((1 + d0)/(1 + d2)) + * / log((1/2 + p0)/(1/2 - p0))) + * * log((1/2 + p0)/(1/2 - p0)) + * = (1 + d') log((1/2 + p0)/(1/2 - p0)) + * = (1 + d') logit(1/2 + p0) + * + * where + * + * d' = d1 + log((1 + d0)/(1 + d2))/logit(1/2 + p0) + * + d1 log((1 + d0)/(1 + d2))/logit(1/2 + p0). + * + * For |p| > 1/2 - 1/(1 + e), logit(1/2 + p0) > 1. + * Provided |d0|, |d2| < 1/4, by Lemma 3 we have + * + * |log((1 + d0)/(1 + d2))| <= 4|d0 - d2|. + * + * Hence the relative error is bounded by + * + * |d'| <= |d1| + 4|d0 - d2| + 4|d1| |d0 - d2| + * <= |d1| + 4|d0| + 4|d2| + 4|d1 d0| + 4|d1 d2| + * <= 9 eps + 8 eps^2. + */ + return log((0.5 + p0)/(0.5 - p0)); + } +} + +/* + * The following random_uniform_01 is tailored for IEEE 754 binary64 + * floating-point or smaller. It can be adapted to larger + * floating-point formats like i387 80-bit or IEEE 754 binary128, but + * it may require sampling more bits. + */ +CTASSERT(FLT_RADIX == 2); +CTASSERT(-DBL_MIN_EXP <= 1021); +CTASSERT(DBL_MANT_DIG <= 53); + +/** + * Draw a floating-point number in [0, 1] with uniform distribution. + * + * Note that the probability of returning 0 is less than 2^-1074, so + * callers need not check for it. However, callers that cannot handle + * rounding to 1 must deal with that, because it occurs with + * probability 2^-54, which is small but nonnegligible. + */ +STATIC double +random_uniform_01(void) +{ + uint32_t z, x, hi, lo; + double s; + + /* + * Draw an exponent, geometrically distributed, but give up if + * we get a run of more than 1088 zeros, which really means the + * system is broken. + */ + z = 0; + while ((x = crypto_rand_uint32()) == 0) { + if (z >= 1088) + /* Your bit sampler is broken. Go home. */ + return 0; + z += 32; + } + z += clz32(x); + + /* + * Pick 32-bit halves of an odd normalized significand. + * Picking it odd breaks ties in the subsequent rounding, which + * occur only with measure zero in the uniform distribution on + * [0, 1]. + */ + hi = crypto_rand_uint32() | UINT32_C(0x80000000); + lo = crypto_rand_uint32() | UINT32_C(0x00000001); + + /* Round to nearest scaled significand in [2^63, 2^64]. */ + s = hi*(double)4294967296 + lo; + + /* Rescale into [1/2, 1] and apply exponent in one swell foop. */ + return s * ldexp(1, -(64 + z)); +} + +/*******************************************************************/ + +/* Functions for specific probability distributions start here: */ + +/* + * Logistic(mu, sigma) distribution, supported on (-\infty,+\infty) + * + * This is the uniform distribution on [0,1] mapped into log-odds + * space, scaled by sigma and translated by mu. + * + * pdf(x) = e^{-(x - mu)/sigma} sigma (1 + e^{-(x - mu)/sigma})^2 + * cdf(x) = 1/(1 + e^{-(x - mu)/sigma}) = logistic((x - mu)/sigma) + * sf(x) = 1 - cdf(x) = 1 - logistic((x - mu)/sigma = logistic(-(x - mu)/sigma) + * icdf(p) = mu + sigma log p/(1 - p) = mu + sigma logit(p) + * isf(p) = mu + sigma log (1 - p)/p = mu - sigma logit(p) + */ + +/** + * Compute the CDF of the Logistic(mu, sigma) distribution: the + * logistic function. Well-conditioned for negative inputs and small + * positive inputs; ill-conditioned for large positive inputs. + */ +STATIC double +cdf_logistic(double x, double mu, double sigma) +{ + return logistic((x - mu)/sigma); +} + +/** + * Compute the SF of the Logistic(mu, sigma) distribution: the logistic + * function reflected over the y axis. Well-conditioned for positive + * inputs and small negative inputs; ill-conditioned for large negative + * inputs. + */ +STATIC double +sf_logistic(double x, double mu, double sigma) +{ + return logistic(-(x - mu)/sigma); +} + +/** + * Compute the inverse of the CDF of the Logistic(mu, sigma) + * distribution: the logit function. Well-conditioned near 0; + * ill-conditioned near 1/2 and 1. + */ +STATIC double +icdf_logistic(double p, double mu, double sigma) +{ + return mu + sigma*logit(p); +} + +/** + * Compute the inverse of the SF of the Logistic(mu, sigma) + * distribution: the -logit function. Well-conditioned near 0; + * ill-conditioned near 1/2 and 1. + */ +STATIC double +isf_logistic(double p, double mu, double sigma) +{ + return mu - sigma*logit(p); +} + +/* + * LogLogistic(alpha, beta) distribution, supported on (0, +\infty). + * + * This is the uniform distribution on [0,1] mapped into odds space, + * scaled by positive alpha and shaped by positive beta. + * + * Equivalent to computing exp of a Logistic(log alpha, 1/beta) sample. + * (Name arises because the pdf has LogLogistic(x; alpha, beta) = + * Logistic(log x; log alpha, 1/beta) and mathematicians got their + * covariance contravariant.) + * + * pdf(x) = (beta/alpha) (x/alpha)^{beta - 1}/(1 + (x/alpha)^beta)^2 + * = (1/e^mu sigma) (x/e^mu)^{1/sigma - 1} / + * (1 + (x/e^mu)^{1/sigma})^2 + * cdf(x) = 1/(1 + (x/alpha)^-beta) = 1/(1 + (x/e^mu)^{-1/sigma}) + * = 1/(1 + (e^{log x}/e^mu)^{-1/sigma}) + * = 1/(1 + (e^{log x - mu})^{-1/sigma}) + * = 1/(1 + e^{-(log x - mu)/sigma}) + * = logistic((log x - mu)/sigma) + * = logistic((log x - log alpha)/(1/beta)) + * sf(x) = 1 - 1/(1 + (x/alpha)^-beta) + * = (x/alpha)^-beta/(1 + (x/alpha)^-beta) + * = 1/((x/alpha)^beta + 1) + * = 1/(1 + (x/alpha)^beta) + * icdf(p) = alpha (p/(1 - p))^{1/beta} + * = alpha e^{logit(p)/beta} + * = e^{mu + sigma logit(p)} + * isf(p) = alpha ((1 - p)/p)^{1/beta} + * = alpha e^{-logit(p)/beta} + * = e^{mu - sigma logit(p)} + */ + +/** + * Compute the CDF of the LogLogistic(alpha, beta) distribution. + * Well-conditioned for all x and alpha, and the condition number + * + * -beta/[1 + (x/alpha)^{-beta}] + * + * grows linearly with beta. + * + * Loosely, the relative error of this implementation is bounded by + * + * 4 eps + 2 eps^2 + O(beta eps), + * + * so don't bother trying this for beta anywhere near as large as + * 1/eps, around which point it levels off at 1. + */ +STATIC double +cdf_log_logistic(double x, double alpha, double beta) +{ + /* + * Let d0 be the error of x/alpha; d1, of pow; d2, of +; and + * d3, of the final quotient. The exponentiation gives + * + * ((1 + d0) x/alpha)^{-beta} + * = (x/alpha)^{-beta} (1 + d0)^{-beta} + * = (x/alpha)^{-beta} (1 + (1 + d0)^{-beta} - 1) + * = (x/alpha)^{-beta} (1 + d') + * + * where d' = (1 + d0)^{-beta} - 1. If y = (x/alpha)^{-beta}, + * the denominator is + * + * (1 + d2) (1 + (1 + d1) (1 + d') y) + * = (1 + d2) (1 + y + (d1 + d' + d1 d') y) + * = 1 + y + (1 + d2) (d1 + d' + d1 d') y + * = (1 + y) (1 + (1 + d2) (d1 + d' + d1 d') y/(1 + y)) + * = (1 + y) (1 + d''), + * + * where d'' = (1 + d2) (d1 + d' + d1 d') y/(1 + y). The + * final result is + * + * (1 + d3) / [(1 + d2) (1 + d'') (1 + y)] + * = (1 + d''') / (1 + y) + * + * for |d'''| <= 2|d3 - d''| by Lemma 2 as long as |d''| < 1/2 + * (which may not be the case for very large beta). This + * relative error is therefore bounded by + * + * |d'''| + * <= 2|d3 - d''| + * <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d') y/(1 + y)| + * <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d')| + * = 2|d3| + 2|d1 + d' + d1 d' + d2 d1 + d2 d' + d2 d1 d'| + * <= 2|d3| + 2|d1| + 2|d'| + 2|d1 d'| + 2|d2 d1| + 2|d2 d'| + * + 2|d2 d1 d'| + * <= 4 eps + 2 eps^2 + (2 + 2 eps + 2 eps^2) |d'|. + * + * Roughly, |d'| = |(1 + d0)^{-beta} - 1| grows like beta eps, + * until it levels off at 1. + */ + return 1/(1 + pow(x/alpha, -beta)); +} + +/** + * Compute the SF of the LogLogistic(alpha, beta) distribution. + * Well-conditioned for all x and alpha, and the condition number + * + * beta/[1 + (x/alpha)^beta] + * + * grows linearly with beta. + * + * Loosely, the relative error of this implementation is bounded by + * + * 4 eps + 2 eps^2 + O(beta eps) + * + * so don't bother trying this for beta anywhere near as large as + * 1/eps, beyond which point it grows unbounded. + */ +STATIC double +sf_log_logistic(double x, double alpha, double beta) +{ + /* + * The error analysis here is essentially the same as in + * cdf_log_logistic, except that rather than levelling off at + * 1, |(1 + d0)^beta - 1| grows unbounded. + */ + return 1/(1 + pow(x/alpha, beta)); +} + +/** + * Compute the inverse of the CDF of the LogLogistic(alpha, beta) + * distribution. Ill-conditioned for p near 1 and beta near 0 with + * condition number 1/[beta (1 - p)]. + */ +STATIC double +icdf_log_logistic(double p, double alpha, double beta) +{ + return alpha*pow(p/(1 - p), 1/beta); +} + +/** + * Compute the inverse of the SF of the LogLogistic(alpha, beta) + * distribution. Ill-conditioned for p near 1 and for large beta, with + * condition number -1/[beta (1 - p)]. + */ +STATIC double +isf_log_logistic(double p, double alpha, double beta) +{ + return alpha*pow((1 - p)/p, 1/beta); +} + +/* + * Weibull(lambda, k) distribution, supported on (0, +\infty). + * + * pdf(x) = (k/lambda) (x/lambda)^{k - 1} e^{-(x/lambda)^k} + * cdf(x) = 1 - e^{-(x/lambda)^k} + * icdf(p) = lambda * (-log (1 - p))^{1/k} + * sf(x) = e^{-(x/lambda)^k} + * isf(p) = lambda * (-log p)^{1/k} + */ + +/** + * Compute the CDF of the Weibull(lambda, k) distribution. + * Well-conditioned for small x and k, and for large lambda -- + * condition number + * + * -k (x/lambda)^k exp(-(x/lambda)^k)/[exp(-(x/lambda)^k) - 1] + * + * grows linearly with k, x^k, and lambda^{-k}. + */ +STATIC double +cdf_weibull(double x, double lambda, double k) +{ + return -expm1(-pow(x/lambda, k)); +} + +/** + * Compute the SF of the Weibull(lambda, k) distribution. + * Well-conditioned for small x and k, and for large lambda -- + * condition number + * + * -k (x/lambda)^k + * + * grows linearly with k, x^k, and lambda^{-k}. + */ +STATIC double +sf_weibull(double x, double lambda, double k) +{ + return exp(-pow(x/lambda, k)); +} + +/** + * Compute the inverse of the CDF of the Weibull(lambda, k) + * distribution. Ill-conditioned for p near 1, and for k near 0; + * condition number is + * + * (p/(1 - p))/(k log(1 - p)). + */ +STATIC double +icdf_weibull(double p, double lambda, double k) +{ + return lambda*pow(-log1p(-p), 1/k); +} + +/** + * Compute the inverse of the SF of the Weibull(lambda, k) + * distribution. Ill-conditioned for p near 0, and for k near 0; + * condition number is + * + * 1/(k log(p)). + */ +STATIC double +isf_weibull(double p, double lambda, double k) +{ + return lambda*pow(-log(p), 1/k); +} + +/* + * GeneralizedPareto(mu, sigma, xi), supported on (mu, +\infty) for + * nonnegative xi, or (mu, mu - sigma/xi) for negative xi. + * + * Samples: + * = mu - sigma log U, if xi = 0; + * = mu + sigma (U^{-xi} - 1)/xi = mu + sigma*expm1(-xi log U)/xi, if xi =/= 0, + * where U is uniform on (0,1]. + * = mu + sigma (e^{xi X} - 1)/xi, + * where X has standard exponential distribution. + * + * pdf(x) = sigma^{-1} (1 + xi (x - mu)/sigma)^{-(1 + 1/xi)} + * cdf(x) = 1 - (1 + xi (x - mu)/sigma)^{-1/xi} + * = 1 - e^{-log(1 + xi (x - mu)/sigma)/xi} + * --> 1 - e^{-(x - mu)/sigma} as xi --> 0 + * sf(x) = (1 + xi (x - mu)/sigma)^{-1/xi} + * --> e^{-(x - mu)/sigma} as xi --> 0 + * icdf(p) = mu + sigma*(p^{-xi} - 1)/xi + * = mu + sigma*expm1(-xi log p)/xi + * --> mu + sigma*log p as xi --> 0 + * isf(p) = mu + sigma*((1 - p)^{xi} - 1)/xi + * = mu + sigma*expm1(-xi log1p(-p))/xi + * --> mu + sigma*log1p(-p) as xi --> 0 + */ + +/** + * Compute the CDF of the GeneralizedPareto(mu, sigma, xi) + * distribution. Well-conditioned everywhere. For standard + * distribution (mu=0, sigma=1), condition number + * + * (x/(1 + x xi)) / ((1 + x xi)^{1/xi} - 1) + * + * is bounded by 1, attained only at x = 0. + */ +STATIC double +cdf_genpareto(double x, double mu, double sigma, double xi) +{ + double x_0 = (x - mu)/sigma; + + /* + * log(1 + xi x_0)/xi + * = (-1/xi) \sum_{n=1}^\infty (-xi x_0)^n/n + * = (-1/xi) (-xi x_0 + \sum_{n=2}^\infty (-xi x_0)^n/n) + * = x_0 - (1/xi) \sum_{n=2}^\infty (-xi x_0)^n/n + * = x_0 - x_0 \sum_{n=2}^\infty (-xi x_0)^{n-1}/n + * = x_0 (1 - d), + * + * where d = \sum_{n=2}^\infty (-xi x_0)^{n-1}/n. If |xi| < + * eps/4|x_0|, then + * + * |d| <= \sum_{n=2}^\infty (eps/4)^{n-1}/n + * <= \sum_{n=2}^\infty (eps/4)^{n-1} + * = \sum_{n=1}^\infty (eps/4)^n + * = (eps/4) \sum_{n=0}^\infty (eps/4)^n + * = (eps/4)/(1 - eps/4) + * < eps/2 + * + * for any 0 < eps < 2. Thus, the relative error of x_0 from + * log(1 + xi x_0)/xi is bounded by eps. + */ + if (fabs(xi) < 1e-17/x_0) + return -expm1(-x_0); + else + return -expm1(-log1p(xi*x_0)/xi); +} + +/** + * Compute the SF of the GeneralizedPareto(mu, sigma, xi) distribution. + * For standard distribution (mu=0, sigma=1), ill-conditioned for xi + * near 0; condition number + * + * -x (1 + x xi)^{(-1 - xi)/xi}/(1 + x xi)^{-1/xi} + * = -x (1 + x xi)^{-1/xi - 1}/(1 + x xi)^{-1/xi} + * = -(x/(1 + x xi)) (1 + x xi)^{-1/xi}/(1 + x xi)^{-1/xi} + * = -x/(1 + x xi) + * + * is bounded by 1/xi. + */ +STATIC double +sf_genpareto(double x, double mu, double sigma, double xi) +{ + double x_0 = (x - mu)/sigma; + + if (fabs(xi) < 1e-17/x_0) + return exp(-x_0); + else + return exp(-log1p(xi*x_0)/xi); +} + +/** + * Compute the inverse of the CDF of the GeneralizedPareto(mu, sigma, + * xi) distribution. Ill-conditioned for p near 1; condition number is + * + * xi (p/(1 - p))/(1 - (1 - p)^xi) + */ +STATIC double +icdf_genpareto(double p, double mu, double sigma, double xi) +{ + /* + * To compute f(xi) = (U^{-xi} - 1)/xi = (e^{-xi log U} - 1)/xi + * for xi near zero (note f(xi) --> -log U as xi --> 0), write + * the absolutely convergent Taylor expansion + * + * f(xi) = (1/xi)*(-xi log U + \sum_{n=2}^\infty (-xi log U)^n/n! + * = -log U + (1/xi)*\sum_{n=2}^\infty (-xi log U)^n/n! + * = -log U + \sum_{n=2}^\infty xi^{n-1} (-log U)^n/n! + * = -log U - log U \sum_{n=2}^\infty (-xi log U)^{n-1}/n! + * = -log U (1 + \sum_{n=2}^\infty (-xi log U)^{n-1}/n!). + * + * Let d = \sum_{n=2}^\infty (-xi log U)^{n-1}/n!. What do we + * lose if we discard it and use -log U as an approximation to + * f(xi)? If |xi| < eps/-4log U, then + * + * |d| <= \sum_{n=2}^\infty |xi log U|^{n-1}/n! + * <= \sum_{n=2}^\infty (eps/4)^{n-1}/n! + * <= \sum_{n=1}^\infty (eps/4)^n + * = (eps/4) \sum_{n=0}^\infty (eps/4)^n + * = (eps/4)/(1 - eps/4) + * < eps/2, + * + * for any 0 < eps < 2. Hence, as long as |xi| < eps/-2log U, + * f(xi) = -log U (1 + d) for |d| <= eps/2. |d| is the + * relative error of f(xi) from -log U; from this bound, the + * relative error of -log U from f(xi) is at most (eps/2)/(1 - + * eps/2) = eps/2 + (eps/2)^2 + (eps/2)^3 + ... < eps for 0 < + * eps < 1. Since -log U < 1000 for all U in (0, 1] in + * binary64 floating-point, we can safely cut xi off at 1e-20 < + * eps/4000 and attain <1ulp error from series truncation. + */ + if (fabs(xi) <= 1e-20) + return mu - sigma*log1p(-p); + else + return mu + sigma*expm1(-xi*log1p(-p))/xi; +} + +/** + * Compute the inverse of the SF of the GeneralizedPareto(mu, sigma, + * xi) distribution. Ill-conditioned for p near 1; conditon number is + * + * -xi/(1 - p^{-xi}) + */ +STATIC double +isf_genpareto(double p, double mu, double sigma, double xi) +{ + if (fabs(xi) <= 1e-20) + return mu - sigma*log(p); + else + return mu + sigma*expm1(-xi*log(p))/xi; +} + +/*******************************************************************/ + +/** + * Deterministic samplers, parametrized by uniform integer and (0,1] + * samples. No guarantees are made about _which_ mapping from the + * integer and (0,1] samples these use; all that is guaranteed is the + * distribution of the outputs conditioned on a uniform distribution on + * the inputs. The automatic tests in test_prob_distr.c double-check + * the particular mappings we use. + * + * Beware: Unlike random_uniform_01(), these are not guaranteed to be + * supported on all possible outputs. See Ilya Mironov, `On the + * Significance of the Least Significant Bits for Differential + * Privacy', for an example of what can go wrong if you try to use + * these to conceal information from an adversary but you expose the + * specific full-precision floating-point values. + * + * Note: None of these samplers use rejection sampling; they are all + * essentially inverse-CDF transforms with tweaks. If you were to add, + * say, a Gamma sampler with the Marsaglia-Tsang method, you would have + * to parametrize it by a potentially infinite stream of uniform (and + * perhaps normal) samples rather than a fixed number, which doesn't + * make for quite as nice automatic testing as for these. + */ + +/** + * Deterministically sample from the interval [a, b], indexed by a + * uniform random floating-point number p0 in (0, 1]. + * + * Note that even if p0 is nonzero, the result may be equal to a, if + * ulp(a)/2 is nonnegligible, e.g. if a = 1. For maximum resolution, + * arrange |a| <= |b|. + */ +STATIC double +sample_uniform_interval(double p0, double a, double b) +{ + /* + * XXX Prove that the distribution is, in fact, uniform on + * [a,b], particularly around p0 = 1, or at least has very + * small deviation from uniform, quantified appropriately + * (e.g., like in Monahan 1984, or by KL divergence). It + * almost certainly does but it would be nice to quantify the + * error. + */ + if ((a <= 0 && 0 <= b) || (b <= 0 && 0 <= a)) { + /* + * When ab < 0, (1 - t) a + t b is monotonic, since for + * a <= b it is a sum of nondecreasing functions of t, + * and for b <= a, of nonincreasing functions of t. + * Further, clearly at 0 and 1 it attains a and b, + * respectively. Hence it is bounded within [a, b]. + */ + return (1 - p0)*a + p0*b; + } else { + /* + * a + (b - a) t is monotonic -- it is obviously a + * nondecreasing function of t for a <= b. Further, it + * attains a at 0, and while it may overshoot b at 1, + * we have a + * + * Theorem. If 0 <= t < 1, then the floating-point + * evaluation of a + (b - a) t is bounded in [a, b]. + * + * Lemma 1. If 0 <= t < 1 is a floating-point number, + * then for any normal floating-point number x except + * the smallest in magnitude, |round(x*t)| < |x|. + * + * Proof. WLOG, assume x >= 0. Since the rounding + * function and t |---> x*t are nondecreasing, their + * composition t |---> round(x*t) is also + * nondecreasing, so it suffices to consider the + * largest floating-point number below 1, in particular + * t = 1 - ulp(1)/2. + * + * Case I: If x is a power of two, then the next + * floating-point number below x is x - ulp(x)/2 = x - + * x*ulp(1)/2 = x*(1 - ulp(1)/2) = x*t, so, since x*t + * is a floating-point number, multiplication is exact, + * and thus round(x*t) = x*t < x. + * + * Case II: If x is not a power of two, then the + * greatest lower bound of real numbers rounded to x is + * x - ulp(x)/2 = x - ulp(T(x))/2 = x - T(x)*ulp(1)/2, + * where T(X) is the largest power of two below x. + * Anything below this bound is rounded to a + * floating-point number smaller than x, and x*t = x*(1 + * - ulp(1)/2) = x - x*ulp(1)/2 < x - T(x)*ulp(1)/2 + * since T(x) < x, so round(x*t) < x*t < x. QED. + * + * Lemma 2. If x and y are subnormal, then round(x + + * y) = x + y. + * + * Proof. It is a matter of adding the significands, + * since if we treat subnormals as having an implicit + * zero bit before the `binary' point, their exponents + * are all the same. There is at most one carry/borrow + * bit, which can always be acommodated either in a + * subnormal, or, at largest, in the implicit one bit + * of a normal. + * + * Lemma 3. Let x and y be floating-point numbers. If + * round(x - y) is subnormal or zero, then it is equal + * to x - y. + * + * Proof. Case I (equal): round(x - y) = 0 iff x = y; + * hence if round(x - y) = 0, then round(x - y) = 0 = x + * - y. + * + * Case II (subnormal/subnormal): If x and y are both + * subnormal, this follows directly from Lemma 2. + * + * Case IIIa (normal/subnormal): If x is normal and y + * is subnormal, then x and y must share sign, or else + * x - y would be larger than x and thus rounded to + * normal. If s is the smallest normal positive + * floating-point number, |x| < 2s since by + * construction 2s - |y| is normal for all subnormal y. + * This means that x and y must have the same exponent, + * so the difference is the difference of significands, + * which is exact. + * + * Case IIIb (subnormal/normal): Same as case IIIa for + * -(y - x). + * + * Case IV (normal/normal): If x and y are both normal, + * then they must share sign, or else x - y would be + * larger than x and thus rounded to normal. Note that + * |y| < 2|x|, for if |y| >= 2|x|, then |x| - |y| <= + * -|x| but -|x| is normal like x. Also, |x|/2 < |y|: + * if |x|/2 is subnormal, it must hold because y is + * normal; if |x|/2 is normal, then |x|/2 >= s, so + * since |x| - |y| < s, + * + * |x|/2 = |x| - |x|/2 <= |x| - s <= |y|; + * + * that is, |x|/2 < |y| < 2|x|, so by the Sterbenz + * lemma, round(x - y) = x - y. QED. + * + * Proof of theorem. WLOG, assume 0 <= a <= b. Since + * round(a + round(round(b - a)*t) is nondecreasing in + * t and attains a at 0, the lower end of the bound is + * trivial; we must show the upper end of the bound + * strictly. It suffices to show this for the largest + * floating-point number below 1, namely 1 - ulp(1)/2. + * + * Case I: round(b - a) is normal. Then it is at most + * the smallest floating-point number above b - a. By + * Lemma 1, round(round(b - a)*t) < round(b - a). + * Since the inequality is strict, and since + * round(round(b - a)*t) is a floating-point number + * below round(b - a), and since there are no + * floating-point numbers between b - a and round(b - + * a), we must have round(round(b - a)*t) < b - a. + * Then since y |---> round(a + y) is nondecreasing, we + * must have + * + * round(a + round(round(b - a)*t)) + * <= round(a + (b - a)) + * = round(b) = b. + * + * Case II: round(b - a) is subnormal. In this case, + * Lemma 1 falls apart -- we are not guaranteed the + * strict inequality. However, by Lemma 3, the + * difference is exact: round(b - a) = b - a. Thus, + * + * round(a + round(round(b - a)*t)) + * <= round(a + round((b - a)*t)) + * <= round(a + (b - a)) + * = round(b) + * = b, + * + * QED. + */ + + /* p0 is restricted to [0,1], but we use >= to silence -Wfloat-equal. */ + if (p0 >= 1) + return b; + return a + (b - a)*p0; + } +} + +/** + * Deterministically sample from the standard logistic distribution, + * indexed by a uniform random 32-bit integer s and uniform random + * floating-point numbers t and p0 in (0, 1]. + */ +STATIC double +sample_logistic(uint32_t s, double t, double p0) +{ + double sign = (s & 1) ? -1 : +1; + double r; + + /* + * We carve up the interval (0, 1) into subregions to compute + * the inverse CDF precisely: + * + * A = (0, 1/(1 + e)] ---> (-\infty, -1] + * B = [1/(1 + e), 1/2] ---> [-1, 0] + * C = [1/2, 1 - 1/(1 + e)] ---> [0, 1] + * D = [1 - 1/(1 + e), 1) ---> [1, +\infty) + * + * Cases D and C are mirror images of cases A and B, + * respectively, so we choose between them by the sign chosen + * by a fair coin toss. We choose between cases A and B by a + * coin toss weighted by + * + * 2/(1 + e) = 1 - [1/2 - 1/(1 + e)]/(1/2): + * + * if it comes up heads, scale p0 into a uniform (0, 1/(1 + e)] + * sample p; if it comes up tails, scale p0 into a uniform (0, + * 1/2 - 1/(1 + e)] sample and compute the inverse CDF of p = + * 1/2 - p0. + */ + if (t <= 2/(1 + exp(1))) { + /* p uniform in (0, 1/(1 + e)], represented by p. */ + p0 /= 1 + exp(1); + r = logit(p0); + } else { + /* + * p uniform in [1/(1 + e), 1/2), actually represented + * by p0 = 1/2 - p uniform in (0, 1/2 - 1/(1 + e)], so + * that p = 1/2 - p. + */ + p0 *= 0.5 - 1/(1 + exp(1)); + r = logithalf(p0); + } + + /* + * We have chosen from the negative half of the standard + * logistic distribution, which is symmetric with the positive + * half. Now use the sign to choose uniformly between them. + */ + return sign*r; +} + +/** + * Deterministically sample from the logistic distribution scaled by + * sigma and translated by mu. + */ +static double +sample_logistic_locscale(uint32_t s, double t, double p0, double mu, + double sigma) +{ + + return mu + sigma*sample_logistic(s, t, p0); +} + +/** + * Deterministically sample from the standard log-logistic + * distribution, indexed by a uniform random 32-bit integer s and a + * uniform random floating-point number p0 in (0, 1]. + */ +STATIC double +sample_log_logistic(uint32_t s, double p0) +{ + + /* + * Carve up the interval (0, 1) into (0, 1/2] and [1/2, 1); the + * condition numbers of the icdf and the isf coincide at 1/2. + */ + p0 *= 0.5; + if ((s & 1) == 0) { + /* p = p0 in (0, 1/2] */ + return p0/(1 - p0); + } else { + /* p = 1 - p0 in [1/2, 1) */ + return (1 - p0)/p0; + } +} + +/** + * Deterministically sample from the log-logistic distribution with + * scale alpha and shape beta. + */ +static double +sample_log_logistic_scaleshape(uint32_t s, double p0, double alpha, + double beta) +{ + double x = sample_log_logistic(s, p0); + + return alpha*pow(x, 1/beta); +} + +/** + * Deterministically sample from the standard exponential distribution, + * indexed by a uniform random 32-bit integer s and a uniform random + * floating-point number p0 in (0, 1]. + */ +static double +sample_exponential(uint32_t s, double p0) +{ + /* + * We would like to evaluate log(p) for p near 0, and log1p(-p) + * for p near 1. Simply carve the interval into (0, 1/2] and + * [1/2, 1) by a fair coin toss. + */ + p0 *= 0.5; + if ((s & 1) == 0) + /* p = p0 in (0, 1/2] */ + return -log(p0); + else + /* p = 1 - p0 in [1/2, 1) */ + return -log1p(-p0); +} + +/** + * Deterministically sample from a Weibull distribution with scale + * lambda and shape k -- just an exponential with a shape parameter in + * addition to a scale parameter. (Yes, lambda really is the scale, + * _not_ the rate.) + */ +STATIC double +sample_weibull(uint32_t s, double p0, double lambda, double k) +{ + + return lambda*pow(sample_exponential(s, p0), 1/k); +} + +/** + * Deterministically sample from the generalized Pareto distribution + * with shape xi, indexed by a uniform random 32-bit integer s and a + * uniform random floating-point number p0 in (0, 1]. + */ +STATIC double +sample_genpareto(uint32_t s, double p0, double xi) +{ + double x = sample_exponential(s, p0); + + /* + * Write f(xi) = (e^{xi x} - 1)/xi for xi near zero as the + * absolutely convergent Taylor series + * + * f(x) = (1/xi) (xi x + \sum_{n=2}^\infty (xi x)^n/n!) + * = x + (1/xi) \sum_{n=2}^\inty (xi x)^n/n! + * = x + \sum_{n=2}^\infty xi^{n-1} x^n/n! + * = x + x \sum_{n=2}^\infty (xi x)^{n-1}/n! + * = x (1 + \sum_{n=2}^\infty (xi x)^{n-1}/n!). + * + * d = \sum_{n=2}^\infty (xi x)^{n-1}/n! is the relative error + * of f(x) from x. If |xi| < eps/4x, then + * + * |d| <= \sum_{n=2}^\infty |xi x|^{n-1}/n! + * <= \sum_{n=2}^\infty (eps/4)^{n-1}/n! + * <= \sum_{n=1}^\infty (eps/4) + * = (eps/4) \sum_{n=0}^\infty (eps/4)^n + * = (eps/4)/(1 - eps/4) + * < eps/2, + * + * for any 0 < eps < 2. Hence, as long as |xi| < eps/2x, f(xi) + * = x (1 + d) for |d| <= eps/2, so x = f(xi) (1 + d') for |d'| + * <= eps. What bound should we use for x? + * + * - If x is exponentially distributed, x > 200 with + * probability below e^{-200} << 2^{-256}, i.e. never. + * + * - If x is computed by -log(U) for U in (0, 1], x is + * guaranteed to be below 1000 in IEEE 754 binary64 + * floating-point. + * + * We can safely cut xi off at 1e-20 < eps/4000 and attain an + * error bounded by 0.5 ulp for this expression. + */ + return (fabs(xi) < 1e-20 ? x : expm1(xi*x)/xi); +} + +/** + * Deterministically sample from a generalized Pareto distribution with + * shape xi, scaled by sigma and translated by mu. + */ +static double +sample_genpareto_locscale(uint32_t s, double p0, double mu, double sigma, + double xi) +{ + + return mu + sigma*sample_genpareto(s, p0, xi); +} + +/** + * Deterministically sample from the geometric distribution with + * per-trial success probability p. + * + * XXX Quantify the error (KL divergence?) of this + * ceiling-of-exponential sampler from a true geometric distribution, + * which we could get by rejection sampling. Relevant papers: + * + * John F. Monahan, `Accuracy in Random Number Generation', + * Mathematics of Computation 45(172), October 1984, pp. 559--568. +*https://pdfs.semanticscholar.org/aca6/74b96da1df77b2224e8cfc5dd6d61a471632.pdf + * + * Karl Bringmann and Tobias Friedrich, `Exact and Efficient + * Generation of Geometric Random Variates and Random Graphs', in + * Proceedings of the 40th International Colloaquium on Automata, + * Languages, and Programming -- ICALP 2013, Springer LNCS 7965, + * pp.267--278. + * https://doi.org/10.1007/978-3-642-39206-1_23 + * https://people.mpi-inf.mpg.de/~kbringma/paper/2013ICALP-1.pdf + */ +static double +sample_geometric(uint32_t s, double p0, double p) +{ + double x = sample_exponential(s, p0); + + /* This is actually a check against 1, but we do >= so that the compiler + does not raise a -Wfloat-equal */ + if (p >= 1) + return 1; + + return (-x/log1p(-p)); +} + +/*******************************************************************/ + +/** Public API for probability distributions: + * + * For each probability distribution we define each public functions + * (sample/cdf/sf/icdf/isf) as part of its dist_ops structure. + */ + +/** Functions for uniform distribution */ +const struct dist_ops uniform_ops = { + .name = "uniform", + .sample = uniform_sample, + .cdf = uniform_cdf, + .sf = uniform_sf, + .icdf = uniform_icdf, + .isf = uniform_isf, +}; + +double +uniform_sample(const struct dist *dist) +{ + const struct uniform *U = const_container_of(dist, struct uniform, + base); + double p0 = random_uniform_01(); + + return sample_uniform_interval(p0, U->a, U->b); +} + +double +uniform_cdf(const struct dist *dist, double x) +{ + const struct uniform *U = const_container_of(dist, struct uniform, + base); + + if (x < U->a) + return 0; + else if (x < U->b) + return (x - U->a)/(U->b - U->a); + else + return 1; +} + +double +uniform_sf(const struct dist *dist, double x) +{ + const struct uniform *U = const_container_of(dist, struct uniform, + base); + + if (x > U->b) + return 0; + else if (x > U->a) + return (U->b - x)/(U->b - U->a); + else + return 1; +} + +double +uniform_icdf(const struct dist *dist, double p) +{ + const struct uniform *U = const_container_of(dist, struct uniform, + base); + double w = U->b - U->a; + + return (p < 0.5 ? (U->a + w*p) : (U->b - w*(1 - p))); +} + +double +uniform_isf(const struct dist *dist, double p) +{ + const struct uniform *U = const_container_of(dist, struct uniform, + base); + double w = U->b - U->a; + + return (p < 0.5 ? (U->b - w*p) : (U->a + w*(1 - p))); +} + +/** Functions for logistic distribution: */ +const struct dist_ops logistic_ops = { + .name = "logistic", + .sample = logistic_sample, + .cdf = logistic_cdf, + .sf = logistic_sf, + .icdf = logistic_icdf, + .isf = logistic_isf, +}; + +double +logistic_sample(const struct dist *dist) +{ + const struct logistic *L = const_container_of(dist, struct logistic, + base); + uint32_t s = crypto_rand_uint32(); + double t = random_uniform_01(); + double p0 = random_uniform_01(); + + return sample_logistic_locscale(s, t, p0, L->mu, L->sigma); +} + +double +logistic_cdf(const struct dist *dist, double x) +{ + const struct logistic *L = const_container_of(dist, struct logistic, + base); + + return cdf_logistic(x, L->mu, L->sigma); +} + +double +logistic_sf(const struct dist *dist, double x) +{ + const struct logistic *L = const_container_of(dist, struct logistic, + base); + + return sf_logistic(x, L->mu, L->sigma); +} + +double +logistic_icdf(const struct dist *dist, double p) +{ + const struct logistic *L = const_container_of(dist, struct logistic, + base); + + return icdf_logistic(p, L->mu, L->sigma); +} + +double +logistic_isf(const struct dist *dist, double p) +{ + const struct logistic *L = const_container_of(dist, struct logistic, + base); + + return isf_logistic(p, L->mu, L->sigma); +} + +/** Functions for log-logistic distribution: */ +const struct dist_ops log_logistic_ops = { + .name = "log logistic", + .sample = log_logistic_sample, + .cdf = log_logistic_cdf, + .sf = log_logistic_sf, + .icdf = log_logistic_icdf, + .isf = log_logistic_isf, +}; + +double +log_logistic_sample(const struct dist *dist) +{ + const struct log_logistic *LL = const_container_of(dist, struct + log_logistic, base); + uint32_t s = crypto_rand_uint32(); + double p0 = random_uniform_01(); + + return sample_log_logistic_scaleshape(s, p0, LL->alpha, LL->beta); +} + +double +log_logistic_cdf(const struct dist *dist, double x) +{ + const struct log_logistic *LL = const_container_of(dist, + struct log_logistic, base); + + return cdf_log_logistic(x, LL->alpha, LL->beta); +} + +double +log_logistic_sf(const struct dist *dist, double x) +{ + const struct log_logistic *LL = const_container_of(dist, + struct log_logistic, base); + + return sf_log_logistic(x, LL->alpha, LL->beta); +} + +double +log_logistic_icdf(const struct dist *dist, double p) +{ + const struct log_logistic *LL = const_container_of(dist, + struct log_logistic, base); + + return icdf_log_logistic(p, LL->alpha, LL->beta); +} + +double +log_logistic_isf(const struct dist *dist, double p) +{ + const struct log_logistic *LL = const_container_of(dist, + struct log_logistic, base); + + return isf_log_logistic(p, LL->alpha, LL->beta); +} + +/** Functions for Weibull distribution */ +const struct dist_ops weibull_ops = { + .name = "Weibull", + .sample = weibull_sample, + .cdf = weibull_cdf, + .sf = weibull_sf, + .icdf = weibull_icdf, + .isf = weibull_isf, +}; + +double +weibull_sample(const struct dist *dist) +{ + const struct weibull *W = const_container_of(dist, struct weibull, + base); + uint32_t s = crypto_rand_uint32(); + double p0 = random_uniform_01(); + + return sample_weibull(s, p0, W->lambda, W->k); +} + +double +weibull_cdf(const struct dist *dist, double x) +{ + const struct weibull *W = const_container_of(dist, struct weibull, + base); + + return cdf_weibull(x, W->lambda, W->k); +} + +double +weibull_sf(const struct dist *dist, double x) +{ + const struct weibull *W = const_container_of(dist, struct weibull, + base); + + return sf_weibull(x, W->lambda, W->k); +} + +double +weibull_icdf(const struct dist *dist, double p) +{ + const struct weibull *W = const_container_of(dist, struct weibull, + base); + + return icdf_weibull(p, W->lambda, W->k); +} + +double +weibull_isf(const struct dist *dist, double p) +{ + const struct weibull *W = const_container_of(dist, struct weibull, + base); + + return isf_weibull(p, W->lambda, W->k); +} + +/** Functions for generalized Pareto distributions */ +const struct dist_ops genpareto_ops = { + .name = "generalized Pareto", + .sample = genpareto_sample, + .cdf = genpareto_cdf, + .sf = genpareto_sf, + .icdf = genpareto_icdf, + .isf = genpareto_isf, +}; + +double +genpareto_sample(const struct dist *dist) +{ + const struct genpareto *GP = const_container_of(dist, struct genpareto, + base); + uint32_t s = crypto_rand_uint32(); + double p0 = random_uniform_01(); + + return sample_genpareto_locscale(s, p0, GP->mu, GP->sigma, GP->xi); +} + +double +genpareto_cdf(const struct dist *dist, double x) +{ + const struct genpareto *GP = const_container_of(dist, struct genpareto, + base); + + return cdf_genpareto(x, GP->mu, GP->sigma, GP->xi); +} + +double +genpareto_sf(const struct dist *dist, double x) +{ + const struct genpareto *GP = const_container_of(dist, struct genpareto, + base); + + return sf_genpareto(x, GP->mu, GP->sigma, GP->xi); +} + +double +genpareto_icdf(const struct dist *dist, double p) +{ + const struct genpareto *GP = const_container_of(dist, struct genpareto, + base); + + return icdf_genpareto(p, GP->mu, GP->sigma, GP->xi); +} + +double +genpareto_isf(const struct dist *dist, double p) +{ + const struct genpareto *GP = const_container_of(dist, struct genpareto, + base); + + return isf_genpareto(p, GP->mu, GP->sigma, GP->xi); +} + +/* Deterministically sample from the geometric distribution with + * per-trial success probability p. */ +double +geometric_sample(double p) +{ + uint32_t s = crypto_rand_uint32(); + double p0 = random_uniform_01(); + return sample_geometric(s, p0, p); +} + diff --git a/src/lib/math/prob_distr.h b/src/lib/math/prob_distr.h new file mode 100644 index 0000000000..c2fd6c74b3 --- /dev/null +++ b/src/lib/math/prob_distr.h @@ -0,0 +1,156 @@ + +/** + * \file prob_distr.h + * + * \brief Header for prob_distr.c + **/ + +#ifndef TOR_PROB_DISTR_H +#define TOR_PROB_DISTR_H + +#include "lib/cc/compat_compiler.h" +#include "lib/cc/torint.h" +#include "lib/testsupport/testsupport.h" + +/** + * Container for distribution parameters for sampling, CDF, &c. + */ +struct dist { + const struct dist_ops *ops; +}; + +#define DIST_BASE(OPS) { .ops = (OPS) } + +struct dist_ops { + const char *name; + double (*sample)(const struct dist *); + double (*cdf)(const struct dist *, double x); + double (*sf)(const struct dist *, double x); + double (*icdf)(const struct dist *, double p); + double (*isf)(const struct dist *, double p); +}; + +/* Geometric distribution */ + +double geometric_sample(double p); + +/* Pareto distribution */ + +struct genpareto { + struct dist base; + double mu; + double sigma; + double xi; +}; + +double genpareto_sample(const struct dist *dist); +double genpareto_cdf(const struct dist *dist, double x); +double genpareto_sf(const struct dist *dist, double x); +double genpareto_icdf(const struct dist *dist, double p); +double genpareto_isf(const struct dist *dist, double p); + +extern const struct dist_ops genpareto_ops; + +/* Weibull distribution */ + +struct weibull { + struct dist base; + double lambda; + double k; +}; + +double weibull_sample(const struct dist *dist); +double weibull_cdf(const struct dist *dist, double x); +double weibull_sf(const struct dist *dist, double x); +double weibull_icdf(const struct dist *dist, double p); +double weibull_isf(const struct dist *dist, double p); + +extern const struct dist_ops weibull_ops; + +/* Log-logistic distribution */ + +struct log_logistic { + struct dist base; + double alpha; + double beta; +}; + +double log_logistic_sample(const struct dist *dist); +double log_logistic_cdf(const struct dist *dist, double x); +double log_logistic_sf(const struct dist *dist, double x); +double log_logistic_icdf(const struct dist *dist, double p); +double log_logistic_isf(const struct dist *dist, double p); + +extern const struct dist_ops log_logistic_ops; + +/* Logistic distribution */ + +struct logistic { + struct dist base; + double mu; + double sigma; +}; + +double logistic_sample(const struct dist *dist); +double logistic_cdf(const struct dist *dist, double x); +double logistic_sf(const struct dist *dist, double x); +double logistic_icdf(const struct dist *dist, double p); +double logistic_isf(const struct dist *dist, double p); + +extern const struct dist_ops logistic_ops; + +/* Uniform distribution */ + +struct uniform { + struct dist base; + double a; + double b; +}; + +double uniform_sample(const struct dist *dist); +double uniform_cdf(const struct dist *dist, double x); +double uniform_sf(const struct dist *dist, double x); +double uniform_icdf(const struct dist *dist, double p); +double uniform_isf(const struct dist *dist, double p); + +extern const struct dist_ops uniform_ops; + +/** Only by unittests */ + +#ifdef PROB_DISTR_PRIVATE + +STATIC double logithalf(double p0); +STATIC double logit(double p); + +STATIC double random_uniform_01(void); + +STATIC double logistic(double x); +STATIC double cdf_logistic(double x, double mu, double sigma); +STATIC double sf_logistic(double x, double mu, double sigma); +STATIC double icdf_logistic(double p, double mu, double sigma); +STATIC double isf_logistic(double p, double mu, double sigma); +STATIC double sample_logistic(uint32_t s, double t, double p0); + +STATIC double cdf_log_logistic(double x, double alpha, double beta); +STATIC double sf_log_logistic(double x, double alpha, double beta); +STATIC double icdf_log_logistic(double p, double alpha, double beta); +STATIC double isf_log_logistic(double p, double alpha, double beta); +STATIC double sample_log_logistic(uint32_t s, double p0); + +STATIC double cdf_weibull(double x, double lambda, double k); +STATIC double sf_weibull(double x, double lambda, double k); +STATIC double icdf_weibull(double p, double lambda, double k); +STATIC double isf_weibull(double p, double lambda, double k); +STATIC double sample_weibull(uint32_t s, double p0, double lambda, double k); + +STATIC double sample_uniform_interval(double p0, double a, double b); + +STATIC double cdf_genpareto(double x, double mu, double sigma, double xi); +STATIC double sf_genpareto(double x, double mu, double sigma, double xi); +STATIC double icdf_genpareto(double p, double mu, double sigma, double xi); +STATIC double isf_genpareto(double p, double mu, double sigma, double xi); +STATIC double sample_genpareto(uint32_t s, double p0, double xi); + +#endif + +#endif diff --git a/src/test/include.am b/src/test/include.am index 4da0b84392..b276500fd5 100644 --- a/src/test/include.am +++ b/src/test/include.am @@ -157,6 +157,7 @@ src_test_test_SOURCES += \ src/test/test_periodic_event.c \ src/test/test_policy.c \ src/test/test_process.c \ + src/test/test_prob_distr.c \ src/test/test_procmon.c \ src/test/test_proto_http.c \ src/test/test_proto_misc.c \ @@ -207,6 +208,7 @@ src_test_test_slow_SOURCES += \ src/test/test_slow.c \ src/test/test_crypto_slow.c \ src/test/test_process_slow.c \ + src/test/test_prob_distr.c \ src/test/testing_common.c \ src/test/testing_rsakeys.c \ src/ext/tinytest.c diff --git a/src/test/prob_distr_mpfr_ref.c b/src/test/prob_distr_mpfr_ref.c new file mode 100644 index 0000000000..4e64d731cd --- /dev/null +++ b/src/test/prob_distr_mpfr_ref.c @@ -0,0 +1,64 @@ +/* Copyright 2012-2018, The Tor Project, Inc + * See LICENSE for licensing information */ + +/** prob_distr_mpfr_ref.c + * + * Example reference file for GNU MPFR vectors tested in test_prob_distr.c . + * Code by Riastradh. + */ + +#include +#include +#include +#include + +/* Must come after so we get mpfr_printf. */ +#include + +/* gcc -o mpfr prob_distr_mpfr_ref.c -lmpfr -lm */ + +/* Computes logit(p) for p = .49999 */ +int +main(void) +{ + mpfr_t p, q, r; + mpfr_init(p); + mpfr_set_prec(p, 200); + mpfr_init(q); + mpfr_set_prec(q, 200); + mpfr_init(r); + mpfr_set_prec(r, 200); + mpfr_set_d(p, .49999, MPFR_RNDN); + mpfr_set_d(q, 1, MPFR_RNDN); + /* r := q - p = 1 - p */ + mpfr_sub(r, q, p, MPFR_RNDN); + /* q := p/r = p/(1 - p) */ + mpfr_div(q, p, r, MPFR_RNDN); + /* r := log(q) = log(p/(1 - p)) */ + mpfr_log(r, q, MPFR_RNDN); + mpfr_printf("mpfr 200-bit\t%.128Rg\n", r); + + /* + * Print a double approximation to logit three different ways. All + * three agree bit for bit on the libms I tried, with the nextafter + * adjustment (which is well within the 10 eps relative error bound + * advertised). Apparently I must have used the Goldberg expression + * for what I wrote down in the test case. + */ + printf("mpfr 53-bit\t%.17g\n", nextafter(mpfr_get_d(r, MPFR_RNDN), 0), 0); + volatile double p0 = .49999; + printf("log1p\t\t%.17g\n", nextafter(-log1p((1 - 2*p0)/p0), 0)); + volatile double x = (1 - 2*p0)/p0; + volatile double xp1 = x + 1; + printf("Goldberg\t%.17g\n", -x*log(xp1)/(xp1 - 1)); + + /* + * Print a bad approximation, using the naive expression, to see a + * lot of wrong digits, far beyond the 10 eps relative error attained + * by -log1p((1 - 2*p)/p). + */ + printf("naive\t\t%.17g\n", log(p0/(1 - p0))); + + fflush(stdout); + return ferror(stdout); +} diff --git a/src/test/test.c b/src/test/test.c index a0a138b03d..902565dfbe 100644 --- a/src/test/test.c +++ b/src/test/test.c @@ -901,6 +901,7 @@ struct testgroup_t testgroups[] = { { "parsecommon/", parsecommon_tests }, { "periodic-event/" , periodic_event_tests }, { "policy/" , policy_tests }, + { "prob_distr/", prob_distr_tests }, { "procmon/", procmon_tests }, { "process/", process_tests }, { "proto/http/", proto_http_tests }, diff --git a/src/test/test.h b/src/test/test.h index 9f6eb0a7e6..39953e9f7e 100644 --- a/src/test/test.h +++ b/src/test/test.h @@ -243,6 +243,8 @@ extern struct testcase_t parsecommon_tests[]; extern struct testcase_t pem_tests[]; extern struct testcase_t periodic_event_tests[]; extern struct testcase_t policy_tests[]; +extern struct testcase_t prob_distr_tests[]; +extern struct testcase_t slow_stochastic_prob_distr_tests[]; extern struct testcase_t procmon_tests[]; extern struct testcase_t process_tests[]; extern struct testcase_t proto_http_tests[]; diff --git a/src/test/test_prob_distr.c b/src/test/test_prob_distr.c new file mode 100644 index 0000000000..bf0f9e059d --- /dev/null +++ b/src/test/test_prob_distr.c @@ -0,0 +1,1414 @@ +/* Copyright (c) 2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +/** + * \file test_prob_distr.c + * \brief Test probability distributions. + * \detail + * + * For each probability distribution we do two kinds of tests: + * + * a) We do numerical deterministic testing of their cdf/icdf/sf/isf functions + * and the various relationships between them for each distribution. We also + * do deterministic tests on their sampling functions. Test vectors for + * these tests were computed from alternative implementations and were + * eyeballed to make sure they make sense + * (e.g. src/test/prob_distr_mpfr_ref.c computes logit(p) using GNU mpfr + * with 200-bit precision and is then tested in test_logit_logistic()). + * + * b) We do stochastic hypothesis testing (G-test) to ensure that sampling from + * the given distributions is distributed properly. The stochastic tests are + * slow and their false positive rate is not well suited for CI, so they are + * currently disabled-by-default and put into 'tests-slow'. + */ + +#define PROB_DISTR_PRIVATE + +#include "orconfig.h" + +#include "test/test.h" + +#include "core/or/or.h" + +#include "lib/math/prob_distr.h" +#include "lib/math/fp.h" +#include "lib/crypt_ops/crypto_rand.h" + +#include +#include +#include +#include +#include +#include +#include + +/** + * Return floor(d) converted to size_t, as a workaround for complaints + * under -Wbad-function-cast for (size_t)floor(d). + */ +static size_t +floor_to_size_t(double d) +{ + double integral_d = floor(d); + return (size_t)integral_d; +} + +/** + * Return ceil(d) converted to size_t, as a workaround for complaints + * under -Wbad-function-cast for (size_t)ceil(d). + */ +static size_t +ceil_to_size_t(double d) +{ + double integral_d = ceil(d); + return (size_t)integral_d; +} + +/* + * Geometric(p) distribution, supported on {1, 2, 3, ...}. + * + * Compute the probability mass function Geom(n; p) of the number of + * trials before the first success when success has probability p. + */ +static double +logpmf_geometric(unsigned n, double p) +{ + /* This is actually a check against 1, but we do >= so that the compiler + does not raise a -Wfloat-equal */ + if (p >= 1) { + if (n == 1) + return 0; + else + return -HUGE_VAL; + } + return (n - 1)*log1p(-p) + log(p); +} + +/** + * Compute the logistic function, translated in output by 1/2: + * logistichalf(x) = logistic(x) - 1/2. Well-conditioned on the entire + * real plane, with maximum condition number 1 at 0. + * + * This implementation gives relative error bounded by 5 eps. + */ +static double +logistichalf(double x) +{ + /* + * Rewrite this with the identity + * + * 1/(1 + e^{-x}) - 1/2 + * = (1 - 1/2 - e^{-x}/2)/(1 + e^{-x}) + * = (1/2 - e^{-x}/2)/(1 + e^{-x}) + * = (1 - e^{-x})/[2 (1 + e^{-x})] + * = -(e^{-x} - 1)/[2 (1 + e^{-x})], + * + * which we can evaluate by -expm1(-x)/[2 (1 + exp(-x))]. + * + * Suppose exp has error d0, + has error d1, expm1 has error + * d2, and / has error d3, so we evaluate + * + * -(1 + d2) (1 + d3) (e^{-x} - 1) + * / [2 (1 + d1) (1 + (1 + d0) e^{-x})]. + * + * In the denominator, + * + * 1 + (1 + d0) e^{-x} + * = 1 + e^{-x} + d0 e^{-x} + * = (1 + e^{-x}) (1 + d0 e^{-x}/(1 + e^{-x})), + * + * so the relative error of the numerator is + * + * d' = d2 + d3 + d2 d3, + * and of the denominator, + * d'' = d1 + d0 e^{-x}/(1 + e^{-x}) + d0 d1 e^{-x}/(1 + e^{-x}) + * = d1 + d0 L(-x) + d0 d1 L(-x), + * + * where L(-x) is logistic(-x). By Lemma 1 the relative error + * of the quotient is bounded by + * + * 2|d2 + d3 + d2 d3 - d1 - d0 L(x) + d0 d1 L(x)|, + * + * Since 0 < L(x) < 1, this is bounded by + * + * 2|d2| + 2|d3| + 2|d2 d3| + 2|d1| + 2|d0| + 2|d0 d1| + * <= 4 eps + 2 eps^2. + */ + if (x < log(DBL_EPSILON/8)) { + /* + * Avoid overflow in e^{-x}. When x < log(eps/4), we + * we further have x < logit(eps/4), so that + * logistic(x) < eps/4. Hence the relative error of + * logistic(x) - 1/2 from -1/2 is bounded by eps/2, and + * so the relative error of -1/2 from logistic(x) - 1/2 + * is bounded by eps. + */ + return -0.5; + } else { + return -expm1(-x)/(2*(1 + exp(-x))); + } +} + +/** + * Compute the log of the sum of the exps. Caller should arrange the + * array in descending order to minimize error because I don't want to + * deal with using temporary space and the one caller in this file + * arranges that anyway. + * + * Warning: This implementation does not handle infinite or NaN inputs + * sensibly, because I don't need that here at the moment. (NaN, or + * -inf and +inf together, should yield NaN; +inf and finite should + * yield +inf; otherwise all -inf should be ignored because exp(-inf) = + * 0.) + */ +static double +logsumexp(double *A, size_t n) +{ + double maximum, sum; + size_t i; + + if (n == 0) + return log(0); + + maximum = A[0]; + for (i = 1; i < n; i++) { + if (A[i] > maximum) + maximum = A[i]; + } + + sum = 0; + for (i = n; i --> 0;) + sum += exp(A[i] - maximum); + + return log(sum) + maximum; +} + +/** + * Compute log(1 - e^x). Defined only for negative x so that e^x < 1. + * This is the complement of a probability in log space. + */ +static double +log1mexp(double x) +{ + + /* + * We want to compute log on [0, 1/2) but log1p on [1/2, +inf), + * so partition x at -log(2) = log(1/2). + */ + if (-log(2) < x) + return log(-expm1(x)); + else + return log1p(-exp(x)); +} + +/* + * Tests of numerical errors in computing logit, logistic, and the + * various cdfs, sfs, icdfs, and isfs. + */ + +#define arraycount(A) (sizeof(A)/sizeof(A[0])) + +/** Return relative error between actual and expected. + * Special cases: If expected is zero or infinite, return 1 if + * actual is equal to expected and 0 if not, since the + * usual notion of relative error is undefined but we only use this + * for testing relerr(e, a) <= bound. If either is NaN, return NaN, + * which has the property that NaN <= bound is false no matter what + * bound is. + * + * Beware: if you test !(relerr(e, a) > bound), then then the result + * is true when a is NaN because NaN > bound is false too. See + * CHECK_RELERR for correct use to decide when to report failure. + */ +static double +relerr(double expected, double actual) +{ + /* + * To silence -Wfloat-equal, we have to test for equality using + * inequalities: we have (fabs(expected) <= 0) iff (expected == 0), + * and (actual <= expected && actual >= expected) iff actual == + * expected whether expected is zero or infinite. + */ + if (fabs(expected) <= 0 || tor_isinf(expected)) { + if (actual <= expected && actual >= expected) + return 0; + else + return 1; + } else { + return fabs((expected - actual)/expected); + } +} + +/** Check that relative error of expected and actual is within + * relerr_bound. Caller must arrange to have i and relerr_bound in + * scope. */ +#define CHECK_RELERR(expected, actual) do { \ + double check_expected = (expected); \ + double check_actual = (actual); \ + const char *str_expected = #expected; \ + const char *str_actual = #actual; \ + double check_relerr = relerr(expected, actual); \ + if (!(relerr(check_expected, check_actual) <= relerr_bound)) { \ + log_warn(LD_GENERAL, "%s:%d: case %u: relerr(%s=%.17e, %s=%.17e)" \ + " = %.17e > %.17e\n", \ + __func__, __LINE__, (unsigned) i, \ + str_expected, check_expected, \ + str_actual, check_actual, \ + check_relerr, relerr_bound); \ + ok = false; \ + } \ +} while (0) + +/* Check that a <= b. + * Caller must arrange to have i in scope. */ +#define CHECK_LE(a, b) do { \ + double check_a = (a); \ + double check_b = (b); \ + const char *str_a = #a; \ + const char *str_b = #b; \ + if (!(check_a <= check_b)) { \ + log_warn(LD_GENERAL, "%s:%d: case %u: %s=%.17e > %s=%.17e\n", \ + __func__, __LINE__, (unsigned) i, \ + str_a, check_a, str_b, check_b); \ + ok = false; \ + } \ +} while (0) + +/** + * Test the logit and logistic functions. Confirm that they agree with + * the cdf, sf, icdf, and isf of the standard Logistic distribution. + * Confirm that the sampler for the standard logistic distribution maps + * [0, 1] into the right subinterval for the inverse transform, for + * this implementation. + */ +static void +test_logit_logistic(void *arg) +{ + (void) arg; + + static const struct { + double x; /* x = logit(p) */ + double p; /* p = logistic(x) */ + double phalf; /* p - 1/2 = logistic(x) - 1/2 */ + } cases[] = { + { -HUGE_VAL, 0, -0.5 }, + { -1000, 0, -0.5 }, + { -710, 4.47628622567513e-309, -0.5 }, + { -708, 3.307553003638408e-308, -0.5 }, + { -2, .11920292202211755, -.3807970779778824 }, + { -1.0000001, .2689414017088022, -.23105859829119776 }, + { -1, .2689414213699951, -.23105857863000487 }, + { -0.9999999, .26894144103118883, -.2310585589688111 }, + /* see src/test/prob_distr_mpfr_ref.c for computation */ + { -4.000000000537333e-5, .49999, -1.0000000000010001e-5 }, + { -4.000000000533334e-5, .49999, -.00001 }, + { -4.000000108916878e-9, .499999999, -1.0000000272292198e-9 }, + { -4e-9, .499999999, -1e-9 }, + { -4e-16, .5, -1e-16 }, + { -4e-300, .5, -1e-300 }, + { 0, .5, 0 }, + { 4e-300, .5, 1e-300 }, + { 4e-16, .5, 1e-16 }, + { 3.999999886872274e-9, .500000001, 9.999999717180685e-10 }, + { 4e-9, .500000001, 1e-9 }, + { 4.0000000005333336e-5, .50001, .00001 }, + { 8.000042667076272e-3, .502, .002 }, + { 0.9999999, .7310585589688111, .2310585589688111 }, + { 1, .7310585786300049, .23105857863000487 }, + { 1.0000001, .7310585982911977, .23105859829119774 }, + { 2, .8807970779778823, .3807970779778824 }, + { 708, 1, .5 }, + { 710, 1, .5 }, + { 1000, 1, .5 }, + { HUGE_VAL, 1, .5 }, + }; + double relerr_bound = 3e-15; /* >10eps */ + size_t i; + bool ok = true; + + for (i = 0; i < arraycount(cases); i++) { + double x = cases[i].x; + double p = cases[i].p; + double phalf = cases[i].phalf; + + /* + * cdf is logistic, icdf is logit, and symmetry for + * sf/isf. + */ + CHECK_RELERR(logistic(x), cdf_logistic(x, 0, 1)); + CHECK_RELERR(logistic(-x), sf_logistic(x, 0, 1)); + CHECK_RELERR(logit(p), icdf_logistic(p, 0, 1)); + CHECK_RELERR(-logit(p), isf_logistic(p, 0, 1)); + + CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2, 0, 2)); + CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2, 0, 2)); + CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0, 2)/2); + CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, 2)/2); + + CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x/2, 0, .5)); + CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x/2, 0, .5)); + CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0,.5)*2); + CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, .5)*2); + + CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2 + 1, 1, 2)); + CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2 + 1, 1, 2)); + + /* + * For p near 0 and p near 1/2, the arithmetic of + * translating by 1 loses precision. + */ + if (fabs(p) > DBL_EPSILON && fabs(p) < 0.4) { + CHECK_RELERR(icdf_logistic(p, 0, 1), + (icdf_logistic(p, 1, 2) - 1)/2); + CHECK_RELERR(isf_logistic(p, 0, 1), + (isf_logistic(p, 1, 2) - 1)/2); + } + + CHECK_RELERR(p, logistic(x)); + CHECK_RELERR(phalf, logistichalf(x)); + + /* + * On the interior floating-point numbers, either logit or + * logithalf had better give the correct answer. + * + * For probabilities near 0, we can get much finer resolution with + * logit, and for probabilities near 1/2, we can get much finer + * resolution with logithalf by representing them using p - 1/2. + * + * E.g., we can write -.00001 for phalf, and .49999 for p, but the + * difference 1/2 - .00001 gives 1.0000000000010001e-5 in binary64 + * arithmetic. So test logit(.49999) which should give the same + * answer as logithalf(-1.0000000000010001e-5), namely + * -4.000000000537333e-5, and also test logithalf(-.00001) which + * gives -4.000000000533334e-5 instead -- but don't expect + * logit(.49999) to give -4.000000000533334e-5 even though it looks + * like 1/2 - .00001. + * + * A naive implementation of logit will just use log(p/(1 - p)) and + * give the answer -4.000000000551673e-05 for .49999, which is + * wrong in a lot of digits, which happens because log is + * ill-conditioned near 1 and thus amplifies whatever relative + * error we made in computing p/(1 - p). + */ + if ((0 < p && p < 1) || tor_isinf(x)) { + if (phalf >= p - 0.5 && phalf <= p - 0.5) + CHECK_RELERR(x, logit(p)); + if (p >= 0.5 + phalf && p <= 0.5 + phalf) + CHECK_RELERR(x, logithalf(phalf)); + } + + CHECK_RELERR(-phalf, logistichalf(-x)); + if (fabs(phalf) < 0.5 || tor_isinf(x)) + CHECK_RELERR(-x, logithalf(-phalf)); + if (p < 1 || tor_isinf(x)) { + CHECK_RELERR(1 - p, logistic(-x)); + if (p > .75 || tor_isinf(x)) + CHECK_RELERR(-x, logit(1 - p)); + } else { + CHECK_LE(logistic(-x), 1e-300); + } + } + + for (i = 0; i <= 100; i++) { + double p0 = (double)i/100; + + CHECK_RELERR(logit(p0/(1 + M_E)), sample_logistic(0, 0, p0)); + CHECK_RELERR(-logit(p0/(1 + M_E)), sample_logistic(1, 0, p0)); + CHECK_RELERR(logithalf(p0*(0.5 - 1/(1 + M_E))), + sample_logistic(0, 1, p0)); + CHECK_RELERR(-logithalf(p0*(0.5 - 1/(1 + M_E))), + sample_logistic(1, 1, p0)); + } + + if (!ok) + printf("fail logit/logistic / logistic cdf/sf\n"); + + tt_assert(ok); + + done: + ; +} + +/** + * Test the cdf, sf, icdf, and isf of the LogLogistic distribution. + */ +static void +test_log_logistic(void *arg) +{ + (void) arg; + + static const struct { + /* x is a point in the support of the LogLogistic distribution */ + double x; + /* 'p' is the probability that a random variable X for a given LogLogistic + * probability ditribution will take value less-or-equal to x */ + double p; + /* 'np' is the probability that a random variable X for a given LogLogistic + * probability distribution will take value greater-or-equal to x. */ + double np; + } cases[] = { + { 0, 0, 1 }, + { 1e-300, 1e-300, 1 }, + { 1e-17, 1e-17, 1 }, + { 1e-15, 1e-15, .999999999999999 }, + { .1, .09090909090909091, .90909090909090909 }, + { .25, .2, .8 }, + { .5, .33333333333333333, .66666666666666667 }, + { .75, .42857142857142855, .5714285714285714 }, + { .9999, .49997499874993756, .5000250012500626 }, + { .99999999, .49999999749999996, .5000000025 }, + { .999999999999999, .49999999999999994, .5000000000000002 }, + { 1, .5, .5 }, + }; + double relerr_bound = 3e-15; + size_t i; + bool ok = true; + + for (i = 0; i < arraycount(cases); i++) { + double x = cases[i].x; + double p = cases[i].p; + double np = cases[i].np; + + CHECK_RELERR(p, cdf_log_logistic(x, 1, 1)); + CHECK_RELERR(p, cdf_log_logistic(x/2, .5, 1)); + CHECK_RELERR(p, cdf_log_logistic(x*2, 2, 1)); + CHECK_RELERR(p, cdf_log_logistic(sqrt(x), 1, 2)); + CHECK_RELERR(p, cdf_log_logistic(sqrt(x)/2, .5, 2)); + CHECK_RELERR(p, cdf_log_logistic(sqrt(x)*2, 2, 2)); + if (2*sqrt(DBL_MIN) < x) { + CHECK_RELERR(p, cdf_log_logistic(x*x, 1, .5)); + CHECK_RELERR(p, cdf_log_logistic(x*x/2, .5, .5)); + CHECK_RELERR(p, cdf_log_logistic(x*x*2, 2, .5)); + } + + CHECK_RELERR(np, sf_log_logistic(x, 1, 1)); + CHECK_RELERR(np, sf_log_logistic(x/2, .5, 1)); + CHECK_RELERR(np, sf_log_logistic(x*2, 2, 1)); + CHECK_RELERR(np, sf_log_logistic(sqrt(x), 1, 2)); + CHECK_RELERR(np, sf_log_logistic(sqrt(x)/2, .5, 2)); + CHECK_RELERR(np, sf_log_logistic(sqrt(x)*2, 2, 2)); + if (2*sqrt(DBL_MIN) < x) { + CHECK_RELERR(np, sf_log_logistic(x*x, 1, .5)); + CHECK_RELERR(np, sf_log_logistic(x*x/2, .5, .5)); + CHECK_RELERR(np, sf_log_logistic(x*x*2, 2, .5)); + } + + CHECK_RELERR(np, cdf_log_logistic(1/x, 1, 1)); + CHECK_RELERR(np, cdf_log_logistic(1/(2*x), .5, 1)); + CHECK_RELERR(np, cdf_log_logistic(2/x, 2, 1)); + CHECK_RELERR(np, cdf_log_logistic(1/sqrt(x), 1, 2)); + CHECK_RELERR(np, cdf_log_logistic(1/(2*sqrt(x)), .5, 2)); + CHECK_RELERR(np, cdf_log_logistic(2/sqrt(x), 2, 2)); + if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) { + CHECK_RELERR(np, cdf_log_logistic(1/(x*x), 1, .5)); + CHECK_RELERR(np, cdf_log_logistic(1/(2*x*x), .5, .5)); + CHECK_RELERR(np, cdf_log_logistic(2/(x*x), 2, .5)); + } + + CHECK_RELERR(p, sf_log_logistic(1/x, 1, 1)); + CHECK_RELERR(p, sf_log_logistic(1/(2*x), .5, 1)); + CHECK_RELERR(p, sf_log_logistic(2/x, 2, 1)); + CHECK_RELERR(p, sf_log_logistic(1/sqrt(x), 1, 2)); + CHECK_RELERR(p, sf_log_logistic(1/(2*sqrt(x)), .5, 2)); + CHECK_RELERR(p, sf_log_logistic(2/sqrt(x), 2, 2)); + if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) { + CHECK_RELERR(p, sf_log_logistic(1/(x*x), 1, .5)); + CHECK_RELERR(p, sf_log_logistic(1/(2*x*x), .5, .5)); + CHECK_RELERR(p, sf_log_logistic(2/(x*x), 2, .5)); + } + + CHECK_RELERR(x, icdf_log_logistic(p, 1, 1)); + CHECK_RELERR(x/2, icdf_log_logistic(p, .5, 1)); + CHECK_RELERR(x*2, icdf_log_logistic(p, 2, 1)); + CHECK_RELERR(x, icdf_log_logistic(p, 1, 1)); + CHECK_RELERR(sqrt(x)/2, icdf_log_logistic(p, .5, 2)); + CHECK_RELERR(sqrt(x)*2, icdf_log_logistic(p, 2, 2)); + CHECK_RELERR(sqrt(x), icdf_log_logistic(p, 1, 2)); + CHECK_RELERR(x*x/2, icdf_log_logistic(p, .5, .5)); + CHECK_RELERR(x*x*2, icdf_log_logistic(p, 2, .5)); + + if (np < .9) { + CHECK_RELERR(x, isf_log_logistic(np, 1, 1)); + CHECK_RELERR(x/2, isf_log_logistic(np, .5, 1)); + CHECK_RELERR(x*2, isf_log_logistic(np, 2, 1)); + CHECK_RELERR(sqrt(x), isf_log_logistic(np, 1, 2)); + CHECK_RELERR(sqrt(x)/2, isf_log_logistic(np, .5, 2)); + CHECK_RELERR(sqrt(x)*2, isf_log_logistic(np, 2, 2)); + CHECK_RELERR(x*x, isf_log_logistic(np, 1, .5)); + CHECK_RELERR(x*x/2, isf_log_logistic(np, .5, .5)); + CHECK_RELERR(x*x*2, isf_log_logistic(np, 2, .5)); + + CHECK_RELERR(1/x, icdf_log_logistic(np, 1, 1)); + CHECK_RELERR(1/(2*x), icdf_log_logistic(np, .5, 1)); + CHECK_RELERR(2/x, icdf_log_logistic(np, 2, 1)); + CHECK_RELERR(1/sqrt(x), icdf_log_logistic(np, 1, 2)); + CHECK_RELERR(1/(2*sqrt(x)), + icdf_log_logistic(np, .5, 2)); + CHECK_RELERR(2/sqrt(x), icdf_log_logistic(np, 2, 2)); + CHECK_RELERR(1/(x*x), icdf_log_logistic(np, 1, .5)); + CHECK_RELERR(1/(2*x*x), icdf_log_logistic(np, .5, .5)); + CHECK_RELERR(2/(x*x), icdf_log_logistic(np, 2, .5)); + } + + CHECK_RELERR(1/x, isf_log_logistic(p, 1, 1)); + CHECK_RELERR(1/(2*x), isf_log_logistic(p, .5, 1)); + CHECK_RELERR(2/x, isf_log_logistic(p, 2, 1)); + CHECK_RELERR(1/sqrt(x), isf_log_logistic(p, 1, 2)); + CHECK_RELERR(1/(2*sqrt(x)), isf_log_logistic(p, .5, 2)); + CHECK_RELERR(2/sqrt(x), isf_log_logistic(p, 2, 2)); + CHECK_RELERR(1/(x*x), isf_log_logistic(p, 1, .5)); + CHECK_RELERR(1/(2*x*x), isf_log_logistic(p, .5, .5)); + CHECK_RELERR(2/(x*x), isf_log_logistic(p, 2, .5)); + } + + for (i = 0; i <= 100; i++) { + double p0 = (double)i/100; + + CHECK_RELERR(0.5*p0/(1 - 0.5*p0), sample_log_logistic(0, p0)); + CHECK_RELERR((1 - 0.5*p0)/(0.5*p0), + sample_log_logistic(1, p0)); + } + + if (!ok) + printf("fail log logistic cdf/sf\n"); + + tt_assert(ok); + + done: + ; +} + +/** + * Test the cdf, sf, icdf, isf of the Weibull distribution. + */ +static void +test_weibull(void *arg) +{ + (void) arg; + + static const struct { + /* x is a point in the support of the Weibull distribution */ + double x; + /* 'p' is the probability that a random variable X for a given Weibull + * probability ditribution will take value less-or-equal to x */ + double p; + /* 'np' is the probability that a random variable X for a given Weibull + * probability distribution will take value greater-or-equal to x. */ + double np; + } cases[] = { + { 0, 0, 1 }, + { 1e-300, 1e-300, 1 }, + { 1e-17, 1e-17, 1 }, + { .1, .09516258196404043, .9048374180359595 }, + { .5, .3934693402873666, .6065306597126334 }, + { .6931471805599453, .5, .5 }, + { 1, .6321205588285577, .36787944117144233 }, + { 10, .9999546000702375, 4.5399929762484854e-5 }, + { 36, .9999999999999998, 2.319522830243569e-16 }, + { 37, .9999999999999999, 8.533047625744066e-17 }, + { 38, 1, 3.1391327920480296e-17 }, + { 100, 1, 3.720075976020836e-44 }, + { 708, 1, 3.307553003638408e-308 }, + { 710, 1, 4.47628622567513e-309 }, + { 1000, 1, 0 }, + { HUGE_VAL, 1, 0 }, + }; + double relerr_bound = 3e-15; + size_t i; + bool ok = true; + + for (i = 0; i < arraycount(cases); i++) { + double x = cases[i].x; + double p = cases[i].p; + double np = cases[i].np; + + CHECK_RELERR(p, cdf_weibull(x, 1, 1)); + CHECK_RELERR(p, cdf_weibull(x/2, .5, 1)); + CHECK_RELERR(p, cdf_weibull(x*2, 2, 1)); + /* For 0 < x < sqrt(DBL_MIN), x^2 loses lots of bits. */ + if (x <= 0 || + sqrt(DBL_MIN) <= x) { + CHECK_RELERR(p, cdf_weibull(x*x, 1, .5)); + CHECK_RELERR(p, cdf_weibull(x*x/2, .5, .5)); + CHECK_RELERR(p, cdf_weibull(x*x*2, 2, .5)); + } + CHECK_RELERR(p, cdf_weibull(sqrt(x), 1, 2)); + CHECK_RELERR(p, cdf_weibull(sqrt(x)/2, .5, 2)); + CHECK_RELERR(p, cdf_weibull(sqrt(x)*2, 2, 2)); + CHECK_RELERR(np, sf_weibull(x, 1, 1)); + CHECK_RELERR(np, sf_weibull(x/2, .5, 1)); + CHECK_RELERR(np, sf_weibull(x*2, 2, 1)); + CHECK_RELERR(np, sf_weibull(x*x, 1, .5)); + CHECK_RELERR(np, sf_weibull(x*x/2, .5, .5)); + CHECK_RELERR(np, sf_weibull(x*x*2, 2, .5)); + if (x >= 10) { + /* + * exp amplifies the error of sqrt(x)^2 + * proportionally to exp(x); for large inputs + * this is significant. + */ + double t = -expm1(-x*(2*DBL_EPSILON + DBL_EPSILON)); + relerr_bound = t + DBL_EPSILON + t*DBL_EPSILON; + if (relerr_bound < 3e-15) + /* + * The tests are written only to 16 + * decimal places anyway even if your + * `double' is, say, i387 binary80, for + * whatever reason. + */ + relerr_bound = 3e-15; + CHECK_RELERR(np, sf_weibull(sqrt(x), 1, 2)); + CHECK_RELERR(np, sf_weibull(sqrt(x)/2, .5, 2)); + CHECK_RELERR(np, sf_weibull(sqrt(x)*2, 2, 2)); + } + + if (p <= 0.75) { + /* + * For p near 1, not enough precision near 1 to + * recover x. + */ + CHECK_RELERR(x, icdf_weibull(p, 1, 1)); + CHECK_RELERR(x/2, icdf_weibull(p, .5, 1)); + CHECK_RELERR(x*2, icdf_weibull(p, 2, 1)); + } + if (p >= 0.25 && !tor_isinf(x) && np > 0) { + /* + * For p near 0, not enough precision in np + * near 1 to recover x. For 0, isf gives inf, + * even if p is precise enough for the icdf to + * work. + */ + CHECK_RELERR(x, isf_weibull(np, 1, 1)); + CHECK_RELERR(x/2, isf_weibull(np, .5, 1)); + CHECK_RELERR(x*2, isf_weibull(np, 2, 1)); + } + } + + for (i = 0; i <= 100; i++) { + double p0 = (double)i/100; + + CHECK_RELERR(3*sqrt(-log(p0/2)), sample_weibull(0, p0, 3, 2)); + CHECK_RELERR(3*sqrt(-log1p(-p0/2)), + sample_weibull(1, p0, 3, 2)); + } + + if (!ok) + printf("fail Weibull cdf/sf\n"); + + tt_assert(ok); + + done: + ; +} + +/** + * Test the cdf, sf, icdf, and isf of the generalized Pareto + * distribution. + */ +static void +test_genpareto(void *arg) +{ + (void) arg; + + struct { + /* xi is the 'xi' parameter of the generalized Pareto distribution, and the + * rest are the same as in the above tests */ + double xi, x, p, np; + } cases[] = { + { 0, 0, 0, 1 }, + { 1e-300, .004, 3.992010656008528e-3, .9960079893439915 }, + { 1e-300, .1, .09516258196404043, .9048374180359595 }, + { 1e-300, 1, .6321205588285577, .36787944117144233 }, + { 1e-300, 10, .9999546000702375, 4.5399929762484854e-5 }, + { 1e-200, 1e-16, 9.999999999999999e-17, .9999999999999999 }, + { 1e-16, 1e-200, 9.999999999999998e-201, 1 }, + { 1e-16, 1e-16, 1e-16, 1 }, + { 1e-16, .004, 3.992010656008528e-3, .9960079893439915 }, + { 1e-16, .1, .09516258196404043, .9048374180359595 }, + { 1e-16, 1, .6321205588285577, .36787944117144233 }, + { 1e-16, 10, .9999546000702375, 4.539992976248509e-5 }, + { 1e-10, 1e-6, 9.999995000001667e-7, .9999990000005 }, + { 1e-8, 1e-8, 9.999999950000001e-9, .9999999900000001 }, + { 1, 1e-300, 1e-300, 1 }, + { 1, 1e-16, 1e-16, .9999999999999999 }, + { 1, .1, .09090909090909091, .9090909090909091 }, + { 1, 1, .5, .5 }, + { 1, 10, .9090909090909091, .0909090909090909 }, + { 1, 100, .9900990099009901, .0099009900990099 }, + { 1, 1000, .999000999000999, 9.990009990009992e-4 }, + { 10, 1e-300, 1e-300, 1 }, + { 10, 1e-16, 9.999999999999995e-17, .9999999999999999 }, + { 10, .1, .06696700846319258, .9330329915368074 }, + { 10, 1, .21320655780322778, .7867934421967723 }, + { 10, 10, .3696701667040189, .6303298332959811 }, + { 10, 100, .49886285755007337, .5011371424499267 }, + { 10, 1000, .6018968102992647, .3981031897007353 }, + }; + double xi_array[] = { -1.5, -1, -1e-30, 0, 1e-30, 1, 1.5 }; + size_t i, j; + double relerr_bound = 3e-15; + bool ok = true; + + for (i = 0; i < arraycount(cases); i++) { + double xi = cases[i].xi; + double x = cases[i].x; + double p = cases[i].p; + double np = cases[i].np; + + CHECK_RELERR(p, cdf_genpareto(x, 0, 1, xi)); + CHECK_RELERR(p, cdf_genpareto(x*2, 0, 2, xi)); + CHECK_RELERR(p, cdf_genpareto(x/2, 0, .5, xi)); + CHECK_RELERR(np, sf_genpareto(x, 0, 1, xi)); + CHECK_RELERR(np, sf_genpareto(x*2, 0, 2, xi)); + CHECK_RELERR(np, sf_genpareto(x/2, 0, .5, xi)); + + if (p < .5) { + CHECK_RELERR(x, icdf_genpareto(p, 0, 1, xi)); + CHECK_RELERR(x*2, icdf_genpareto(p, 0, 2, xi)); + CHECK_RELERR(x/2, icdf_genpareto(p, 0, .5, xi)); + } + if (np < .5) { + CHECK_RELERR(x, isf_genpareto(np, 0, 1, xi)); + CHECK_RELERR(x*2, isf_genpareto(np, 0, 2, xi)); + CHECK_RELERR(x/2, isf_genpareto(np, 0, .5, xi)); + } + } + + for (i = 0; i < arraycount(xi_array); i++) { + for (j = 0; j <= 100; j++) { + double p0 = (j == 0 ? 2*DBL_MIN : (double)j/100); + + /* This is actually a check against 0, but we do <= so that the compiler + does not raise a -Wfloat-equal */ + if (fabs(xi_array[i]) <= 0) { + /* + * When xi == 0, the generalized Pareto + * distribution reduces to an + * exponential distribution. + */ + CHECK_RELERR(-log(p0/2), + sample_genpareto(0, p0, 0)); + CHECK_RELERR(-log1p(-p0/2), + sample_genpareto(1, p0, 0)); + } else { + CHECK_RELERR(expm1(-xi_array[i]*log(p0/2))/xi_array[i], + sample_genpareto(0, p0, xi_array[i])); + CHECK_RELERR((j == 0 ? DBL_MIN : + expm1(-xi_array[i]*log1p(-p0/2))/xi_array[i]), + sample_genpareto(1, p0, xi_array[i])); + } + + CHECK_RELERR(isf_genpareto(p0/2, 0, 1, xi_array[i]), + sample_genpareto(0, p0, xi_array[i])); + CHECK_RELERR(icdf_genpareto(p0/2, 0, 1, xi_array[i]), + sample_genpareto(1, p0, xi_array[i])); + } + } + + tt_assert(ok); + + done: + ; +} + +/** + * Test the deterministic sampler for uniform distribution on [a, b]. + * + * This currently only tests whether the outcome lies within [a, b]. + */ +static void +test_uniform_interval(void *arg) +{ + (void) arg; + struct { + /* Sample from a uniform distribution with parameters 'a' and 'b', using + * 't' as the sampling index. */ + double t, a, b; + } cases[] = { + { 0, 0, 0 }, + { 0, 0, 1 }, + { 0, 1.0000000000000007, 3.999999999999995 }, + { 0, 4000, 4000 }, + { 0.42475836677491291, 4000, 4000 }, + { 0, -DBL_MAX, DBL_MAX }, + { 0.25, -DBL_MAX, DBL_MAX }, + { 0.5, -DBL_MAX, DBL_MAX }, + }; + size_t i = 0; + bool ok = true; + + for (i = 0; i < arraycount(cases); i++) { + double t = cases[i].t; + double a = cases[i].a; + double b = cases[i].b; + + CHECK_LE(a, sample_uniform_interval(t, a, b)); + CHECK_LE(sample_uniform_interval(t, a, b), b); + + CHECK_LE(a, sample_uniform_interval(1 - t, a, b)); + CHECK_LE(sample_uniform_interval(1 - t, a, b), b); + + CHECK_LE(sample_uniform_interval(t, -b, -a), -a); + CHECK_LE(-b, sample_uniform_interval(t, -b, -a)); + + CHECK_LE(sample_uniform_interval(1 - t, -b, -a), -a); + CHECK_LE(-b, sample_uniform_interval(1 - t, -b, -a)); + } + + tt_assert(ok); + + done: + ; +} + +/********************** Stochastic tests ****************************/ + +/* + * Psi test, sometimes also called G-test. The psi test statistic, + * suitably scaled, has chi^2 distribution, but the psi test tends to + * have better statistical power in practice to detect deviations than + * the chi^2 test does. (The chi^2 test statistic is the first term of + * the Taylor expansion of the psi test statistic.) The psi test is + * generic, for any CDF; particular distributions might have higher- + * power tests to distinguish them from predictable deviations or bugs. + * + * We choose the psi critical value so that a single psi test has + * probability below alpha = 1% of spuriously failing even if all the + * code is correct. But the false positive rate for a suite of n tests + * is higher: 1 - Binom(0; n, alpha) = 1 - (1 - alpha)^n. For n = 10, + * this is about 10%, and for n = 100 it is well over 50%. + * + * We can drive it down by running each test twice, and accepting it if + * it passes at least once; in that case, it is as if we used Binom(2; + * 2, alpha) = alpha^2 as the false positive rate for each test, and + * for n = 10 tests, it would be 0.1%, and for n = 100 tests, still + * only 1%. + * + * The critical value for a chi^2 distribution with 100 degrees of + * freedom and false positive rate alpha = 1% was taken from: + * + * NIST/SEMATECH e-Handbook of Statistical Methods, Section + * 1.3.6.7.4 `Critical Values of the Chi-Square Distribution', + * , + * retrieved 2018-10-28. + */ + +static const size_t NSAMPLES = 100000; +/* Number of chances we give to the test to succeed. */ +static const unsigned NTRIALS = 2; +/* Number of times we want the test to pass per NTRIALS. */ +static const unsigned NPASSES_MIN = 1; + +#define PSI_DF 100 /* degrees of freedom */ +static const double PSI_CRITICAL = 135.807; /* critical value, alpha = .01 */ + +/** + * Perform a psi test on an array of sample counts, C, adding up to N + * samples, and an array of log expected probabilities, logP, + * representing the null hypothesis for the distribution of samples + * counted. Return false if the psi test rejects the null hypothesis, + * true if otherwise. + */ +static bool +psi_test(const size_t C[PSI_DF], const double logP[PSI_DF], size_t N) +{ + double psi = 0; + double c = 0; /* Kahan compensation */ + double t, u; + size_t i; + + for (i = 0; i < PSI_DF; i++) { + /* + * c*log(c/(n*p)) = (1/n) * f*log(f/p) where f = c/n is + * the frequency, and f*log(f/p) ---> 0 as f ---> 0, so + * this is a reasonable choice. Further, any mass that + * _fails_ to turn up in this bin will inflate another + * bin instead, so we don't really lose anything by + * ignoring empty bins even if they have high + * probability. + */ + if (C[i] == 0) + continue; + t = C[i]*(log((double)C[i]/N) - logP[i]) - c; + u = psi + t; + c = (u - psi) - t; + psi = u; + } + psi *= 2; + + return psi <= PSI_CRITICAL; +} + +static bool +test_stochastic_geometric_impl(double p) +{ + double logP[PSI_DF] = {0}; + unsigned ntry = NTRIALS, npass = 0; + unsigned i; + size_t j; + + /* Compute logP[i] = Geom(i + 1; p). */ + for (i = 0; i < PSI_DF - 1; i++) + logP[i] = logpmf_geometric(i + 1, p); + + /* Compute logP[n-1] = log (1 - (P[0] + P[1] + ... + P[n-2])). */ + logP[PSI_DF - 1] = log1mexp(logsumexp(logP, PSI_DF - 1)); + + while (ntry --> 0) { + size_t C[PSI_DF] = {0}; + + for (j = 0; j < NSAMPLES; j++) { + double n_tmp = ceil(geometric_sample(p)); + unsigned n = (unsigned) n_tmp; + + if (n > PSI_DF) + n = PSI_DF; + C[n - 1]++; + } + + if (psi_test(C, logP, NSAMPLES)) { + if (++npass >= NPASSES_MIN) + break; + } + } + + if (npass >= NPASSES_MIN) { + /* printf("pass %s sampler\n", "geometric"); */ + return true; + } else { + printf("fail %s sampler\n", "geometric"); + return false; + } +} + +/** + * Divide the support of dist into histogram bins in logP. Start + * at the 1st percentile and ending at the 99th percentile. Pick the bin + * boundaries using linear interpolation so that they are uniformly spaced. + * + * In each bin logP[i] we insert the expected log-probability that a sampled + * value will fall into that bin. We will use this as the null hypothesis of + * the psi test. + * + * Set logP[i] = log(CDF(x_i) - CDF(x_{i-1})), where x_-1 = -inf, x_n = + * +inf, and x_i = i*(hi - lo)/(n - 2). + */ +static void +bin_cdfs(const struct dist *dist, double lo, double hi, double *logP, size_t n) +{ +#define CDF(x) dist->ops->cdf(dist, x) +#define SF(x) dist->ops->sf(dist, x) + const double w = (hi - lo)/(n - 2); + double halfway = dist->ops->icdf(dist, 0.5); + double x_0, x_1; + size_t i; + size_t n2 = ceil_to_size_t((halfway - lo)/w); + + tor_assert(lo <= halfway); + tor_assert(halfway <= hi); + tor_assert(n2 <= n); + + x_1 = lo; + logP[0] = log(CDF(x_1) - 0); /* 0 = CDF(-inf) */ + for (i = 1; i < n2; i++) { + x_0 = x_1; + /* do the linear interpolation */ + x_1 = (i <= n/2 ? lo + i*w : hi - (n - 2 - i)*w); + /* set the expected log-probability */ + logP[i] = log(CDF(x_1) - CDF(x_0)); + } + x_0 = hi; + logP[n - 1] = log(SF(x_0) - 0); /* 0 = SF(+inf) = 1 - CDF(+inf) */ + + /* In this loop we are filling out the high part of the array. We are using + * SF because in these cases the CDF is near 1 where precision is lower. So + * instead we are using SF near 0 where the precision is higher. We have + * SF(t) = 1 - CDF(t). */ + for (i = 1; i < n - n2; i++) { + x_1 = x_0; + /* do the linear interpolation */ + x_0 = (i <= n/2 ? hi - i*w : lo + (n - 2 - i)*w); + /* set the expected log-probability */ + logP[n - i - 1] = log(SF(x_0) - SF(x_1)); + } +#undef SF +#undef CDF +} + +/** + * Draw NSAMPLES samples from dist, counting the number of samples x in + * the ith bin C[i] if x_{i-1} <= x < x_i, where x_-1 = -inf, x_n = + * +inf, and x_i = i*(hi - lo)/(n - 2). + */ +static void +bin_samples(const struct dist *dist, double lo, double hi, size_t *C, size_t n) +{ + const double w = (hi - lo)/(n - 2); + size_t i; + + for (i = 0; i < NSAMPLES; i++) { + double x = dist->ops->sample(dist); + size_t bin; + + if (x < lo) + bin = 0; + else if (x < hi) + bin = 1 + floor_to_size_t((x - lo)/w); + else + bin = n - 1; + tor_assert(bin < n); + C[bin]++; + } +} + +/** + * Carry out a Psi test on dist. + * + * Sample NSAMPLES from dist, putting them in bins from -inf to lo to + * hi to +inf, and apply up to two psi tests. True if at least one psi + * test passes; false if not. False positive rate should be bounded by + * 0.01^2 = 0.0001. + */ +static bool +test_psi_dist_sample(const struct dist *dist) +{ + double logP[PSI_DF] = {0}; + unsigned ntry = NTRIALS, npass = 0; + double lo = dist->ops->icdf(dist, 1/(double)(PSI_DF + 2)); + double hi = dist->ops->isf(dist, 1/(double)(PSI_DF + 2)); + + /* Create the null hypothesis in logP */ + bin_cdfs(dist, lo, hi, logP, PSI_DF); + + /* Now run the test */ + while (ntry --> 0) { + size_t C[PSI_DF] = {0}; + bin_samples(dist, lo, hi, C, PSI_DF); + if (psi_test(C, logP, NSAMPLES)) { + if (++npass >= NPASSES_MIN) + break; + } + } + + /* Did we fail or succeed? */ + if (npass >= NPASSES_MIN) { + /* printf("pass %s sampler\n", dist->ops->name);*/ + return true; + } else { + printf("fail %s sampler\n", dist->ops->name); + return false; + } +} + +/* This is the seed of the deterministic randomness */ +static uint32_t deterministic_rand_counter; + +/** Initialize the seed of the deterministic randomness. */ +static void +init_deterministic_rand(void) +{ + deterministic_rand_counter = crypto_rand_uint32(); +} + +/** Produce deterministic randomness for the stochastic tests using the global + * deterministic_rand_counter seed + * + * This function produces deterministic data over multiple calls iff it's + * called in the same call order with the same 'n' parameter (which is the + * case for the psi test). If not, outputs will deviate. */ +static void +crypto_rand_deterministic(char *out, size_t n) +{ + /* Use a XOF to squeeze bytes out of that silly counter */ + crypto_xof_t *xof = crypto_xof_new(); + tor_assert(xof); + crypto_xof_add_bytes(xof, (uint8_t*)&deterministic_rand_counter, + sizeof(deterministic_rand_counter)); + crypto_xof_squeeze_bytes(xof, (uint8_t*)out, n); + crypto_xof_free(xof); + + /* Increase counter for next run */ + deterministic_rand_counter++; +} + +static void +test_stochastic_uniform(void *arg) +{ + (void) arg; + + const struct uniform uniform01 = { + .base = DIST_BASE(&uniform_ops), + .a = 0, + .b = 1, + }; + const struct uniform uniform_pos = { + .base = DIST_BASE(&uniform_ops), + .a = 1.23, + .b = 4.56, + }; + const struct uniform uniform_neg = { + .base = DIST_BASE(&uniform_ops), + .a = -10, + .b = -1, + }; + const struct uniform uniform_cross = { + .base = DIST_BASE(&uniform_ops), + .a = -1.23, + .b = 4.56, + }; + const struct uniform uniform_subnormal = { + .base = DIST_BASE(&uniform_ops), + .a = 4e-324, + .b = 4e-310, + }; + const struct uniform uniform_subnormal_cross = { + .base = DIST_BASE(&uniform_ops), + .a = -4e-324, + .b = 4e-310, + }; + bool ok = true; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok &= test_psi_dist_sample(&uniform01.base); + ok &= test_psi_dist_sample(&uniform_pos.base); + ok &= test_psi_dist_sample(&uniform_neg.base); + ok &= test_psi_dist_sample(&uniform_cross.base); + ok &= test_psi_dist_sample(&uniform_subnormal.base); + ok &= test_psi_dist_sample(&uniform_subnormal_cross.base); + + tt_assert(ok); + + done: + ; +} + +static bool +test_stochastic_logistic_impl(double mu, double sigma) +{ + const struct logistic dist = { + .base = DIST_BASE(&logistic_ops), + .mu = mu, + .sigma = sigma, + }; + + /* XXX Consider some fancier logistic test. */ + return test_psi_dist_sample(&dist.base); +} + +static bool +test_stochastic_log_logistic_impl(double alpha, double beta) +{ + const struct log_logistic dist = { + .base = DIST_BASE(&log_logistic_ops), + .alpha = alpha, + .beta = beta, + }; + + /* XXX Consider some fancier log logistic test. */ + return test_psi_dist_sample(&dist.base); +} + +static bool +test_stochastic_weibull_impl(double lambda, double k) +{ + const struct weibull dist = { + .base = DIST_BASE(&weibull_ops), + .lambda = lambda, + .k = k, + }; + +/* + * XXX Consider applying a Tiku-Singh test: + * + * M.L. Tiku and M. Singh, `Testing the two-parameter + * Weibull distribution', Communications in Statistics -- + * Theory and Methods A10(9), 1981, 907--918. + *https://www.tandfonline.com/doi/pdf/10.1080/03610928108828082?needAccess=true + */ + return test_psi_dist_sample(&dist.base); +} + +static bool +test_stochastic_genpareto_impl(double mu, double sigma, double xi) +{ + const struct genpareto dist = { + .base = DIST_BASE(&genpareto_ops), + .mu = mu, + .sigma = sigma, + .xi = xi, + }; + + /* XXX Consider some fancier GPD test. */ + return test_psi_dist_sample(&dist.base); +} + +static void +test_stochastic_genpareto(void *arg) +{ + bool ok = 0; + bool tests_failed = true; + (void) arg; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok = test_stochastic_genpareto_impl(0, 1, -0.25); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(0, 1, -1e-30); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(0, 1, 0); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(0, 1, 1e-30); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(0, 1, 0.25); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(-1, 1, -0.25); + tt_assert(ok); + ok = test_stochastic_genpareto_impl(1, 2, 0.25); + tt_assert(ok); + + tests_failed = false; + + done: + if (tests_failed) { + printf("seed: %"PRIu32, deterministic_rand_counter); + } + UNMOCK(crypto_rand); +} + +static void +test_stochastic_geometric(void *arg) +{ + bool ok = 0; + bool tests_failed = true; + + (void) arg; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok = test_stochastic_geometric_impl(0.1); + tt_assert(ok); + ok = test_stochastic_geometric_impl(0.5); + tt_assert(ok); + ok = test_stochastic_geometric_impl(0.9); + tt_assert(ok); + ok = test_stochastic_geometric_impl(1); + tt_assert(ok); + + tests_failed = false; + + done: + if (tests_failed) { + printf("seed: %"PRIu32, deterministic_rand_counter); + } + UNMOCK(crypto_rand); +} + +static void +test_stochastic_logistic(void *arg) +{ + bool ok = 0; + bool tests_failed = true; + (void) arg; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok = test_stochastic_logistic_impl(0, 1); + tt_assert(ok); + ok = test_stochastic_logistic_impl(0, 1e-16); + tt_assert(ok); + ok = test_stochastic_logistic_impl(1, 10); + tt_assert(ok); + ok = test_stochastic_logistic_impl(-10, 100); + tt_assert(ok); + + tests_failed = false; + + done: + if (tests_failed) { + printf("seed: %"PRIu32, deterministic_rand_counter); + } + UNMOCK(crypto_rand); +} + +static void +test_stochastic_log_logistic(void *arg) +{ + bool ok = 0; + bool tests_failed = true; + (void) arg; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok = test_stochastic_log_logistic_impl(1, 1); + tt_assert(ok); + ok = test_stochastic_log_logistic_impl(1, 10); + tt_assert(ok); + ok = test_stochastic_log_logistic_impl(M_E, 1e-1); + tt_assert(ok); + ok = test_stochastic_log_logistic_impl(exp(-10), 1e-2); + tt_assert(ok); + + tests_failed = false; + + done: + if (tests_failed) { + printf("seed: %"PRIu32, deterministic_rand_counter); + } + UNMOCK(crypto_rand); +} + +static void +test_stochastic_weibull(void *arg) +{ + bool ok = 0; + bool tests_failed = true; + (void) arg; + + init_deterministic_rand(); + MOCK(crypto_rand, crypto_rand_deterministic); + + ok = test_stochastic_weibull_impl(1, 0.5); + tt_assert(ok); + ok = test_stochastic_weibull_impl(1, 1); + tt_assert(ok); + ok = test_stochastic_weibull_impl(1, 1.5); + tt_assert(ok); + ok = test_stochastic_weibull_impl(1, 2); + tt_assert(ok); + ok = test_stochastic_weibull_impl(10, 1); + tt_assert(ok); + + tests_failed = false; + + done: + if (tests_failed) { + printf("seed: %"PRIu32, deterministic_rand_counter); + } + UNMOCK(crypto_rand); +} + +struct testcase_t prob_distr_tests[] = { + { "logit_logistics", test_logit_logistic, TT_FORK, NULL, NULL }, + { "log_logistic", test_log_logistic, TT_FORK, NULL, NULL }, + { "weibull", test_weibull, TT_FORK, NULL, NULL }, + { "genpareto", test_genpareto, TT_FORK, NULL, NULL }, + { "uniform_interval", test_uniform_interval, TT_FORK, NULL, NULL }, + END_OF_TESTCASES +}; + +struct testcase_t slow_stochastic_prob_distr_tests[] = { + { "stochastic_genpareto", test_stochastic_genpareto, TT_FORK, NULL, NULL }, + { "stochastic_geometric", test_stochastic_geometric, TT_FORK, NULL, NULL }, + { "stochastic_uniform", test_stochastic_uniform, TT_FORK, NULL, NULL }, + { "stochastic_logistic", test_stochastic_logistic, TT_FORK, NULL, NULL }, + { "stochastic_log_logistic", test_stochastic_log_logistic, TT_FORK, NULL, + NULL }, + { "stochastic_weibull", test_stochastic_weibull, TT_FORK, NULL, NULL }, + END_OF_TESTCASES +}; diff --git a/src/test/test_slow.c b/src/test/test_slow.c index 97c2912af6..39a203c726 100644 --- a/src/test/test_slow.c +++ b/src/test/test_slow.c @@ -21,6 +21,7 @@ struct testgroup_t testgroups[] = { { "slow/crypto/", slow_crypto_tests }, { "slow/process/", slow_process_tests }, + { "slow/prob_distr/", slow_stochastic_prob_distr_tests }, END_OF_GROUPS }; -- cgit v1.2.3-54-g00ecf From dd04917851c23f16a11c0d2b0c62ea76c93c4c7d Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 27 Nov 2018 01:54:10 +0200 Subject: Use the new probability distribution code in WTF-PAD. Co-authored-by: Mike Perry Co-authored-by: Taylor R Campbell --- src/core/or/circuitpadding.c | 101 ++++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 40 deletions(-) diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index a9d927619d..5210265ff2 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -1,8 +1,11 @@ /* Copyright (c) 2017 The Tor Project, Inc. */ /* See LICENSE for licensing information */ +#define CIRCUITPADDING_PRIVATE + #include #include "lib/math/fp.h" +#include "lib/math/prob_distr.h" #include "core/or/or.h" #include "core/or/circuitpadding.h" #include "core/or/circuitlist.h" @@ -374,7 +377,7 @@ circpad_distribution_sample_iat_delay(const circpad_state_t *state, * of tokens in each bin, and then a time value is chosen uniformly from * that bin's [start,end) time range. */ -static circpad_delay_t +STATIC circpad_delay_t circpad_machine_sample_delay(circpad_machineinfo_t *mi) { const circpad_state_t *state = circpad_machine_current_state(mi); @@ -487,57 +490,75 @@ circpad_machine_sample_delay(circpad_machineinfo_t *mi) static double circpad_distribution_sample(circpad_distribution_t dist) { - double p = 0; + log_fn(LOG_DEBUG,LD_CIRC, "Sampling delay with distribution %d", + dist.type); switch (dist.type) { case CIRCPAD_DIST_NONE: - return 0; + { + /* We should not get in here like this */ + tor_assert_nonfatal_unreached(); + return 0; + } case CIRCPAD_DIST_UNIFORM: - p = crypto_rand_double(); - // param2 is upper bound, param1 is lower - /* The subtraction is exact as long as param2 and param1 are less than - * 2**53. The multiplication is accurate as long as (param2 - param1) - * is less than 2**52. (And when they are large, the low bits aren't - * important.) The result covers the full range of outputs, as long as - * p has a resolution of 1/2**32 or greater. */ - p *= (dist.param2 - dist.param1); - p += dist.param1; - return p; + { + // param2 is upper bound, param1 is lower + const struct uniform my_uniform = { + .base = DIST_BASE(&uniform_ops), + .a = dist.param1, + .b = dist.param2, + }; + return uniform_sample(&my_uniform.base); + } case CIRCPAD_DIST_LOGISTIC: - p = crypto_rand_double(); - /* https://en.wikipedia.org/wiki/Logistic_distribution#Quantile_function - * param1 is Mu, param2 is s. */ - if (p <= 0.0) // Avoid log(0) - return 0; - return dist.param1 + dist.param2*tor_mathlog(p/(1.0-p)); + { + /* param1 is Mu, param2 is sigma. */ + const struct logistic my_logistic = { + .base = DIST_BASE(&uniform_ops), + .mu = dist.param1, + .sigma = dist.param2, + }; + return logistic_sample(&my_logistic.base); + } case CIRCPAD_DIST_LOG_LOGISTIC: - p = crypto_rand_double(); - /* https://en.wikipedia.org/wiki/Log-logistic_distribution#Quantiles - * param1 is Alpha, param2 is Beta */ - return dist.param1 * pow(p/(1.0-p), 1.0/dist.param2); + { + /* param1 is Alpha, param2 is 1.0/Beta */ + const struct log_logistic my_log_logistic = { + .base = DIST_BASE(&log_logistic_ops), + .alpha = dist.param1, + .beta = dist.param2, + }; + return log_logistic_sample(&my_log_logistic.base); + } case CIRCPAD_DIST_GEOMETRIC: { /* param1 is 'p' (success probability) */ return geometric_sample(dist.param1); } case CIRCPAD_DIST_WEIBULL: - p = crypto_rand_double(); - /* https://en.wikipedia.org/wiki/Weibull_distribution \ - * #Cumulative_distribution_function - * param1 is k, param2 is Lambda */ - return dist.param2*pow(-tor_mathlog(1.0-p), 1.0/dist.param1); + { + /* param1 is k, param2 is Lambda */ + const struct weibull my_weibull = { + .base = DIST_BASE(&weibull_ops), + .k = dist.param1, + .lambda = dist.param2, + }; + return weibull_sample(&my_weibull.base); + } case CIRCPAD_DIST_PARETO: - p = 1.0-crypto_rand_double(); // Pareto quantile needs (0,1] - - /* https://en.wikipedia.org/wiki/Generalized_Pareto_distribution \ - * #Generating_generalized_Pareto_random_variables - * param1 is Sigma, param2 is Xi - * Since it's piecewise, we must define it for 0 (or close to 0) */ - if (fabs(dist.param2) <= 1e-22) - return -dist.param1*tor_mathlog(p); - else - return dist.param1*(pow(p, -dist.param2) - 1.0)/dist.param2; + { + /* param1 is sigma, param2 is xi, no more params for mu so we use 0 */ + const struct genpareto my_genpareto = { + .base = DIST_BASE(&weibull_ops), + .mu = 0, + .sigma = dist.param1, + .xi = dist.param2, + }; + return genpareto_sample(&my_genpareto.base); + } } + + tor_assert_nonfatal_unreached(); return 0; } @@ -1086,8 +1107,8 @@ circpad_machine_reached_padding_limit(circpad_machineinfo_t *mi) * Returns 1 if we decide to transition states (due to infinity bin), * 0 otherwise. */ -circpad_decision_t -circpad_machine_schedule_padding(circpad_machineinfo_t *mi) +MOCK_IMPL(circpad_decision_t, +circpad_machine_schedule_padding,(circpad_machineinfo_t *mi)) { circpad_delay_t in_usec = 0; struct timeval timeout; -- cgit v1.2.3-54-g00ecf From 926fc93be5b6afab1a604ecd7c79aa6e8ae8a676 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Fri, 7 Dec 2018 16:35:23 +0200 Subject: Concentrate all TOR_USEC_PER_SEC definitions in a single header file. Co-authored-by: Mike Perry --- src/core/or/circuitpadding.c | 5 +---- src/feature/hibernate/hibernate.c | 3 +-- src/lib/defs/include.am | 1 + src/lib/defs/time.h | 23 +++++++++++++++++++++++ src/lib/time/.may_include | 1 + src/lib/time/tvdiff.c | 3 +-- src/test/test_circuitpadding.c | 39 ++++++++++++++++++--------------------- src/test/test_util.c | 2 +- 8 files changed, 47 insertions(+), 30 deletions(-) create mode 100644 src/lib/defs/time.h diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index 5210265ff2..9f8713f624 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -17,6 +17,7 @@ #include "core/or/channel.h" #include "lib/time/compat_time.h" +#include "lib/defs/time.h" #include "lib/crypt_ops/crypto_rand.h" #include "core/or/crypt_path_st.h" @@ -31,10 +32,6 @@ #include "app/config/config.h" -/* XXX: This is a dup of the constant in ./src/lib/time/tvdiff.c. - * Should/Do we have a header for time constants like this? */ -#define TOR_USEC_PER_SEC (1000000) - static inline circpad_purpose_mask_t circpad_circ_purpose_to_mask(uint8_t circ_purpose); static inline circpad_circuit_state_t circpad_circuit_state( diff --git a/src/feature/hibernate/hibernate.c b/src/feature/hibernate/hibernate.c index feeb3d92ef..f10a45f4ae 100644 --- a/src/feature/hibernate/hibernate.c +++ b/src/feature/hibernate/hibernate.c @@ -37,6 +37,7 @@ hibernating, phase 2: #include "core/or/connection_or.h" #include "feature/control/control.h" #include "lib/crypt_ops/crypto_rand.h" +#include "lib/defs/time.h" #include "feature/hibernate/hibernate.h" #include "core/mainloop/mainloop.h" #include "feature/relay/router.h" @@ -832,8 +833,6 @@ hibernate_soft_limit_reached(void) return get_accounting_bytes() >= soft_limit; } -#define TOR_USEC_PER_SEC (1000000) - /** Called when we get a SIGINT, or when bandwidth soft limit is * reached. Puts us into "loose hibernation": we don't accept new * connections, but we continue handling old ones. */ diff --git a/src/lib/defs/include.am b/src/lib/defs/include.am index 48ee7f29fc..6a7f9114ea 100644 --- a/src/lib/defs/include.am +++ b/src/lib/defs/include.am @@ -2,4 +2,5 @@ noinst_HEADERS += \ src/lib/defs/dh_sizes.h \ src/lib/defs/digest_sizes.h \ + src/lib/defs/time.h \ src/lib/defs/x25519_sizes.h diff --git a/src/lib/defs/time.h b/src/lib/defs/time.h new file mode 100644 index 0000000000..762b23feab --- /dev/null +++ b/src/lib/defs/time.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2001, Matej Pfajfar. + * Copyright (c) 2001-2004, Roger Dingledine. + * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. + * Copyright (c) 2007-2018, The Tor Project, Inc. */ +/* See LICENSE for licensing information */ + +#ifndef TOR_TIME_DEFS_H +#define TOR_TIME_DEFS_H + +/** + * \file time.h + * + * \brief Definitions for timing-related constants. + **/ + +/** How many microseconds per second */ +#define TOR_USEC_PER_SEC (1000000) +/** How many nanoseconds per microsecond */ +#define TOR_NSEC_PER_USEC (1000) +/* How many nanoseconds per millisecond */ +#define TOR_NSEC_PER_MSEC (1000*1000) + +#endif diff --git a/src/lib/time/.may_include b/src/lib/time/.may_include index 40a18805ac..ae01431b60 100644 --- a/src/lib/time/.may_include +++ b/src/lib/time/.may_include @@ -7,6 +7,7 @@ lib/log/*.h lib/subsys/*.h lib/time/*.h lib/wallclock/*.h +lib/defs/time.h # For load_windows_system_lib. lib/fs/winlib.h \ No newline at end of file diff --git a/src/lib/time/tvdiff.c b/src/lib/time/tvdiff.c index bc8a1166e7..9dfb63c26f 100644 --- a/src/lib/time/tvdiff.c +++ b/src/lib/time/tvdiff.c @@ -11,6 +11,7 @@ #include "lib/time/tvdiff.h" #include "lib/cc/compat_compiler.h" +#include "lib/defs/time.h" #include "lib/log/log.h" #ifdef _WIN32 @@ -20,8 +21,6 @@ #include #endif -#define TOR_USEC_PER_SEC 1000000 - /** Return the difference between start->tv_sec and end->tv_sec. * Returns INT64_MAX on overflow and underflow. */ diff --git a/src/test/test_circuitpadding.c b/src/test/test_circuitpadding.c index 78f93f7b24..5693c98e41 100644 --- a/src/test/test_circuitpadding.c +++ b/src/test/test_circuitpadding.c @@ -12,6 +12,7 @@ #include #include "lib/evloop/compat_libevent.h" #include "lib/time/compat_time.h" +#include "lib/defs/time.h" #include "core/or/relay.h" #include "core/or/circuitlist.h" #include "core/or/circuitbuild.h" @@ -32,10 +33,6 @@ extern smartlist_t *connection_array; -#define USEC_PER_SEC (1000000) -#define NSEC_PER_USEC (1000) -#define NSEC_PER_MSEC (1000*1000) - circid_t get_unique_circ_id_by_chan(channel_t *chan); void helper_create_basic_machine(void); static void helper_create_conditional_machines(void); @@ -69,7 +66,7 @@ static circpad_machine_t circ_client_machine; static void timers_advance_and_run(int64_t msec_update) { - curr_mocked_time += msec_update*NSEC_PER_MSEC; + curr_mocked_time += msec_update*TOR_NSEC_PER_MSEC; monotime_coarse_set_mock_time_nsec(curr_mocked_time); monotime_set_mock_time_nsec(curr_mocked_time); timers_run_pending(); @@ -304,9 +301,9 @@ test_circuitpadding_rtt(void *arg) monotime_init(); monotime_enable_test_mocking(); - monotime_set_mock_time_nsec(1*NSEC_PER_USEC); - monotime_coarse_set_mock_time_nsec(1*NSEC_PER_USEC); - curr_mocked_time = 1*NSEC_PER_USEC; + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; timers_initialize(); circpad_machines_init(); @@ -994,9 +991,9 @@ test_circuitpadding_tokens(void *arg) monotime_init(); monotime_enable_test_mocking(); - monotime_set_mock_time_nsec(1*NSEC_PER_USEC); - monotime_coarse_set_mock_time_nsec(1*NSEC_PER_USEC); - curr_mocked_time = 1*NSEC_PER_USEC; + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; timers_initialize(); @@ -1441,9 +1438,9 @@ test_circuitpadding_negotiation(void *arg) monotime_init(); monotime_enable_test_mocking(); - monotime_set_mock_time_nsec(1*NSEC_PER_USEC); - monotime_coarse_set_mock_time_nsec(1*NSEC_PER_USEC); - curr_mocked_time = 1*NSEC_PER_USEC; + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; timers_initialize(); circpad_machines_init(); @@ -1562,7 +1559,7 @@ simulate_single_hop_extend(circuit_t *client, circuit_t *mid_relay, circpad_cell_event_nonpadding_received((circuit_t*)mid_relay); // Advance time a tiny bit so we can calculate an RTT - curr_mocked_time += 10 * NSEC_PER_MSEC; + curr_mocked_time += 10 * TOR_NSEC_PER_MSEC; monotime_coarse_set_mock_time_nsec(curr_mocked_time); monotime_set_mock_time_nsec(curr_mocked_time); @@ -1715,9 +1712,9 @@ test_circuitpadding_conditions(void *arg) monotime_init(); monotime_enable_test_mocking(); - monotime_set_mock_time_nsec(1*NSEC_PER_USEC); - monotime_coarse_set_mock_time_nsec(1*NSEC_PER_USEC); - curr_mocked_time = 1*NSEC_PER_USEC; + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; timers_initialize(); helper_create_conditional_machines(); @@ -1835,9 +1832,9 @@ test_circuitpadding_circuitsetup_machine(void *arg) monotime_init(); monotime_enable_test_mocking(); - monotime_set_mock_time_nsec(1*NSEC_PER_USEC); - monotime_coarse_set_mock_time_nsec(1*NSEC_PER_USEC); - curr_mocked_time = 1*NSEC_PER_USEC; + monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC); + curr_mocked_time = 1*TOR_NSEC_PER_USEC; timers_initialize(); circpad_machines_init(); diff --git a/src/test/test_util.c b/src/test/test_util.c index b983cbb0bf..bf64cff7ef 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -19,6 +19,7 @@ #include "feature/client/transports.h" #include "lib/crypt_ops/crypto_format.h" #include "lib/crypt_ops/crypto_rand.h" +#include "lib/defs/time.h" #include "test/test.h" #include "lib/memarea/memarea.h" #include "lib/process/waitpid.h" @@ -404,7 +405,6 @@ test_util_time(void *arg) /* Assume tv_usec is an unsigned integer until proven otherwise */ #define TV_USEC_MAX UINT_MAX -#define TOR_USEC_PER_SEC 1000000 /* Overflows in the result type */ -- cgit v1.2.3-54-g00ecf From 56a45eb4092e9c543b39e981dce798c64bbc9e5d Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Thu, 27 Dec 2018 10:45:16 +0200 Subject: Disable current padding machines. Co-authored-by: Mike Perry --- src/core/or/circuitpadding.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index 9f8713f624..ee26558bc8 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -2018,6 +2018,8 @@ circpad_setup_machine_on_circ(circuit_t *on_circ, on_circ->padding_machine[machine->machine_index] = machine; } +/* These padding machines are only used for tests pending #28634. */ +#ifdef TOR_UNIT_TESTS static void circpad_circ_client_machine_init(void) { @@ -2163,6 +2165,7 @@ circpad_circ_responder_machine_init(void) circ_responder_machine->machine_num = smartlist_len(relay_padding_machines); smartlist_add(relay_padding_machines, circ_responder_machine); } +#endif /** * Initialize all of our padding machines. @@ -2180,9 +2183,10 @@ circpad_machines_init(void) relay_padding_machines = smartlist_new(); // TODO: Parse machines from consensus and torrc - +#ifdef TOR_UNIT_TESTS circpad_circ_client_machine_init(); circpad_circ_responder_machine_init(); +#endif } /** -- cgit v1.2.3-54-g00ecf From 0658c729cf3c6be27ff774d9d219402a6cf6cf53 Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Fri, 24 Aug 2018 21:37:43 +0000 Subject: Add TODO file for padding work. Note to self/others: don't merge this. Co-authored-by: George Kadianakis --- PADDING_TODO.txt | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 PADDING_TODO.txt diff --git a/PADDING_TODO.txt b/PADDING_TODO.txt new file mode 100644 index 0000000000..a2e97f7ca3 --- /dev/null +++ b/PADDING_TODO.txt @@ -0,0 +1,58 @@ +TODO sketch for this branch, in rough priority order: + +- Clean up/fix XXX's and FIXMEs + - Test event entry points into circuitpad? + - Most of our events come from completely untested code :/ + +- Compat-breaking changes to be decided/done ASAP + - Option to keep circuits open if machine present + - Specify an ordered preference list of padding machines + - Specify exit policy for machine conditions? + - short_policy_t looks good, except for its flexible array member :/ + - Can we make our own struct with a small, fixed number of policy + entries? Say 3-4? Or is that a bad idea to lose this flexibility? + - Check conditions based on attached streams on the circuit + - Accept should mean "only apply if matched" + - Reject should mean "don't apply if matched" + - If a policy is specified, Reject *:* is implicit default (so reject + policies need an Accept entry). + - With no policy, Accept *:* is implicit default. + + +- Misc fixes: + - Remove circuitsetup machine (but place it in unittests -- they depend on it) + - Circuit RTT measurement will break on var_cell/EXTEND2 cells + - Are there any heuristics we can use here? + - If RELAY_EARLY is only for the first cell of an EXTEND2 series, + we can use that. But the proposal currently says MAY, but not MUST + for this behavior. + +======== 0.3.6 ======== + +- Come up with some good histograms for eg circuit setup fingerprinting, + website fingerprinting, and vanguards usage. + +- Vanguards compatibility for MiddleNodes (via changes to vanguards addon) + +- circpad_machine_validate() function to sanity-check histograms loaded from + consensus/torrc (can also be used to help guide a GA). + - Check bin construction + - no type overflow (start_usec + range_sec, etc) + - no conflicting state transitions (or overlap with cancel events) + - no use of both histograms and iat_dist + - at least two histogram bins + - min_hop vs target_hop + +- Support torrc load+serialization of state machines + - ?? + +- Support consensus load+serialization of state machines + - ?? + +- Prop #265 load balancing + +- Rephist timer stats + - Is this a privacy risk? The adversary could create lots of circuits + to find a layer2 vanguard.. Otherwise they will be spread across middles. + + -- cgit v1.2.3-54-g00ecf From cdbd5c0af85775522845a54468492770c54b4cb4 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Thu, 10 Jan 2019 12:28:17 +0200 Subject: Add top-level file documentation for circuitpadding.c --- src/core/or/circuitpadding.c | 38 ++++++++++++++++++++++++++++++++++++++ src/core/or/circuitpadding.h | 3 ++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index ee26558bc8..4c0736fff4 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -1,6 +1,44 @@ /* Copyright (c) 2017 The Tor Project, Inc. */ /* See LICENSE for licensing information */ +/** + * \file circuitpadding.c + * \brief Circuit-level padding implementation + * + * \details + * + * This file implements Tor proposal 254 "Padding Negotiation" which is heavily + * inspired by the paper "Toward an Efficient Website Fingerprinting Defense" + * by M. Juarez, M. Imani, M. Perry, C. Diaz, M. Wright. + * + * In particular the code in this file describes mechanisms for clients to + * negotiate various types of circuit-level padding from relays. + * + * Each padding type is described by a state machine (circpad_machine_t), which + * is also referred as a "padding machine" in this file. Currently, these + * state machines are hardcoded in the source code (e.g. see + * circpad_circ_client_machine_init()), but in the future we will be able to + * serialize them in the torrc or the consensus. + * + * As specified by prop#254, clients can negotiate padding with relays by using + * PADDING_NEGOTIATE cells. After successful padding negotiation, padding + * machines are assigned to the circuit in their mutable form as a + * circpad_machineinfo_t. + * + * Each state of a padding state machine can be either: + * - A histogram that specifies inter-arrival padding delays. + * - Or a parametrized probability distribution that specifies inter-arrival + * delays (see circpad_distribution_type_t). + * + * Padding machines start from the START state and finish with the END + * state. They can transition between states using the events in + * circpad_event_t. + * + * When a padding machine reaches the END state, it gets wiped from the circuit + * so that other padding machines can take over if needed (see + * circpad_machine_transitioned_to_end()). + **/ + #define CIRCUITPADDING_PRIVATE #include diff --git a/src/core/or/circuitpadding.h b/src/core/or/circuitpadding.h index 4680c6be43..6559f916d2 100644 --- a/src/core/or/circuitpadding.h +++ b/src/core/or/circuitpadding.h @@ -6,10 +6,11 @@ * \file circuitpadding.h * \brief Header file for circuitpadding.c. **/ + #ifndef TOR_CIRCUITPADDING_H #define TOR_CIRCUITPADDING_H -#include "circpad_negotiation.h" +#include "src/trunnel/circpad_negotiation.h" #include "lib/evloop/timers.h" typedef struct circuit_t circuit_t; -- cgit v1.2.3-54-g00ecf From 5738a0ab6c7904ed9dd53ce5d045b2dedf69b4f6 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Thu, 10 Jan 2019 12:33:43 +0200 Subject: Rename circpad_machine_t -> circpad_machine_spec_t --- src/core/or/circuit_st.h | 4 +-- src/core/or/circuitpadding.c | 70 +++++++++++++++++++++--------------------- src/core/or/circuitpadding.h | 14 ++++----- src/test/test_circuitpadding.c | 17 +++++----- 4 files changed, 53 insertions(+), 52 deletions(-) diff --git a/src/core/or/circuit_st.h b/src/core/or/circuit_st.h index 0d9ad3cdd5..74f045d723 100644 --- a/src/core/or/circuit_st.h +++ b/src/core/or/circuit_st.h @@ -12,7 +12,7 @@ #include "core/or/cell_queue_st.h" struct hs_token_t; -typedef struct circpad_machine_t circpad_machine_t; +typedef struct circpad_machine_spec_t circpad_machine_spec_t; typedef struct circpad_machineinfo_t circpad_machineinfo_t; /** Number of padding state machines on a circuit. */ @@ -189,7 +189,7 @@ struct circuit_t { * * Each element of this array corresponds to a different padding machine, * and we can have up to CIRCPAD_MAX_MACHINES such machines. */ - const circpad_machine_t *padding_machine[CIRCPAD_MAX_MACHINES]; + const circpad_machine_spec_t *padding_machine[CIRCPAD_MAX_MACHINES]; /** Adaptive Padding machine info for above machines. This is the * per-circuit mutable information, such as the current state and diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index 4c0736fff4..c5a80d6c5d 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -14,9 +14,9 @@ * In particular the code in this file describes mechanisms for clients to * negotiate various types of circuit-level padding from relays. * - * Each padding type is described by a state machine (circpad_machine_t), which - * is also referred as a "padding machine" in this file. Currently, these - * state machines are hardcoded in the source code (e.g. see + * Each padding type is described by a state machine (circpad_machine_spec_t), + * which is also referred as a "padding machine" in this file. Currently, + * these state machines are hardcoded in the source code (e.g. see * circpad_circ_client_machine_init()), but in the future we will be able to * serialize them in the torrc or the consensus. * @@ -36,7 +36,7 @@ * * When a padding machine reaches the END state, it gets wiped from the circuit * so that other padding machines can take over if needed (see - * circpad_machine_transitioned_to_end()). + * circpad_machine_spec_transitioned_to_end()). **/ #define CIRCUITPADDING_PRIVATE @@ -75,7 +75,7 @@ static inline circpad_purpose_mask_t circpad_circ_purpose_to_mask(uint8_t static inline circpad_circuit_state_t circpad_circuit_state( origin_circuit_t *circ); static void circpad_setup_machine_on_circ(circuit_t *on_circ, - const circpad_machine_t *machine); + const circpad_machine_spec_t *machine); static double circpad_distribution_sample(circpad_distribution_t dist); /** Cached consensus params */ @@ -86,12 +86,12 @@ static uint16_t circpad_global_allowed_cells; static uint64_t circpad_global_padding_sent; static uint64_t circpad_global_nonpadding_sent; -/** This is the list of circpad_machine_t's parsed from consensus and torrc - * that have origin_side == 1 (ie: are for client side) */ +/** This is the list of circpad_machine_spec_t's parsed from consensus and + * torrc that have origin_side == 1 (ie: are for client side) */ STATIC smartlist_t *origin_padding_machines = NULL; -/** This is the list of circpad_machine_t's parsed from consensus and torrc - * that have origin_side == 0 (ie: are for relay side) */ +/** This is the list of circpad_machine_spec_t's parsed from consensus and + * torrc that have origin_side == 0 (ie: are for relay side) */ STATIC smartlist_t *relay_padding_machines = NULL; /** Loop over the current padding state machines using loop_var as the @@ -197,7 +197,7 @@ circpad_circuit_machineinfo_new(circuit_t *on_circ, int machine_index) STATIC const circpad_state_t * circpad_machine_current_state(const circpad_machineinfo_t *mi) { - const circpad_machine_t *machine = CIRCPAD_GET_MACHINE(mi); + const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi); if (mi->current_state == CIRCPAD_STATE_END) { return NULL; @@ -1097,7 +1097,7 @@ circpad_new_consensus_params(const networkstatus_t *ns) STATIC bool circpad_machine_reached_padding_limit(circpad_machineinfo_t *mi) { - const circpad_machine_t *machine = CIRCPAD_GET_MACHINE(mi); + const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi); /* If machine_padding_pct is non-zero, and we've sent more * than the allowed count of padding cells, then check our @@ -1233,9 +1233,9 @@ circpad_machine_schedule_padding,(circpad_machineinfo_t *mi)) * not access it. */ static void -circpad_machine_transitioned_to_end(circpad_machineinfo_t *mi) +circpad_machine_spec_transitioned_to_end(circpad_machineinfo_t *mi) { - const circpad_machine_t *machine = CIRCPAD_GET_MACHINE(mi); + const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi); /* * We allow machines to shut down and delete themselves as opposed @@ -1283,7 +1283,7 @@ circpad_machine_transitioned_to_end(circpad_machineinfo_t *mi) * Returns 1 if we transition states, 0 otherwise. */ MOCK_IMPL(circpad_decision_t, -circpad_machine_transition,(circpad_machineinfo_t *mi, +circpad_machine_spec_transition,(circpad_machineinfo_t *mi, circpad_event_t event)) { const circpad_state_t *state = @@ -1331,7 +1331,7 @@ circpad_machine_transition,(circpad_machineinfo_t *mi, /* If we transition to the end state, check to see * if this machine wants to be shut down at end */ if (s == CIRCPAD_STATE_END) { - circpad_machine_transitioned_to_end(mi); + circpad_machine_spec_transitioned_to_end(mi); /* We transitioned but we don't pad in end. Also, mi * may be freed. Returning STATE_CHANGED prevents us * from accessing it in any callers of this function. */ @@ -1485,7 +1485,7 @@ circpad_cell_event_nonpadding_sent(circuit_t *on_circ) if (!circpad_machine_remove_token(on_circ->padding_info[i])) { /* If removing a token did not cause a transition, check if * non-padding sent event should */ - circpad_machine_transition(on_circ->padding_info[i], + circpad_machine_spec_transition(on_circ->padding_info[i], CIRCPAD_EVENT_NONPADDING_SENT); } } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; @@ -1506,7 +1506,7 @@ circpad_cell_event_nonpadding_received(circuit_t *on_circ) /* First, update any RTT estimate */ circpad_estimate_circ_rtt_on_received(on_circ, on_circ->padding_info[i]); - circpad_machine_transition(on_circ->padding_info[i], + circpad_machine_spec_transition(on_circ->padding_info[i], CIRCPAD_EVENT_NONPADDING_RECV); } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; } @@ -1523,7 +1523,7 @@ void circpad_cell_event_padding_sent(circuit_t *on_circ) { FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) { - circpad_machine_transition(on_circ->padding_info[i], + circpad_machine_spec_transition(on_circ->padding_info[i], CIRCPAD_EVENT_PADDING_SENT); } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; } @@ -1541,7 +1541,7 @@ circpad_cell_event_padding_received(circuit_t *on_circ) { /* identical to padding sent */ FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) { - circpad_machine_transition(on_circ->padding_info[i], + circpad_machine_spec_transition(on_circ->padding_info[i], CIRCPAD_EVENT_PADDING_RECV); } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END; } @@ -1558,7 +1558,7 @@ circpad_cell_event_padding_received(circuit_t *on_circ) circpad_decision_t circpad_internal_event_infinity(circpad_machineinfo_t *mi) { - return circpad_machine_transition(mi, CIRCPAD_EVENT_INFINITY); + return circpad_machine_spec_transition(mi, CIRCPAD_EVENT_INFINITY); } /** @@ -1572,7 +1572,7 @@ circpad_internal_event_infinity(circpad_machineinfo_t *mi) circpad_decision_t circpad_internal_event_bins_empty(circpad_machineinfo_t *mi) { - if (circpad_machine_transition(mi, CIRCPAD_EVENT_BINS_EMPTY) + if (circpad_machine_spec_transition(mi, CIRCPAD_EVENT_BINS_EMPTY) == CIRCPAD_STATE_CHANGED) { return CIRCPAD_STATE_CHANGED; } else { @@ -1591,7 +1591,7 @@ circpad_internal_event_bins_empty(circpad_machineinfo_t *mi) circpad_decision_t circpad_internal_event_state_length_up(circpad_machineinfo_t *mi) { - return circpad_machine_transition(mi, CIRCPAD_EVENT_LENGTH_COUNT); + return circpad_machine_spec_transition(mi, CIRCPAD_EVENT_LENGTH_COUNT); } /** @@ -1599,7 +1599,7 @@ circpad_internal_event_state_length_up(circpad_machineinfo_t *mi) */ static inline bool circpad_machine_conditions_met(origin_circuit_t *circ, - const circpad_machine_t *machine) + const circpad_machine_spec_t *machine) { if (!(circpad_circ_purpose_to_mask(TO_CIRCUIT(circ)->purpose) & machine->conditions.purpose_mask)) @@ -1740,7 +1740,7 @@ circpad_add_matching_machines(origin_circuit_t *on_circ) * machines in reverse order, so that more recently added * machines take priority over older ones. */ SMARTLIST_FOREACH_REVERSE_BEGIN(origin_padding_machines, - circpad_machine_t *, + circpad_machine_spec_t *, machine) { /* Machine definitions have a specific target machine index. * This is so event ordering is deterministic with respect @@ -2012,7 +2012,7 @@ circpad_deliver_sent_relay_cell_events(circuit_t *circ, * Initialize the states array for a circpad machine. */ void -circpad_machine_states_init(circpad_machine_t *machine, +circpad_machine_states_init(circpad_machine_spec_t *machine, circpad_statenum_t num_states) { if (BUG(num_states > CIRCPAD_MAX_MACHINE_STATES)) { @@ -2033,7 +2033,7 @@ circpad_machine_states_init(circpad_machine_t *machine, static void circpad_setup_machine_on_circ(circuit_t *on_circ, - const circpad_machine_t *machine) + const circpad_machine_spec_t *machine) { if (CIRCUIT_IS_ORIGIN(on_circ) && !machine->is_origin_side) { log_fn(LOG_WARN, LD_BUG, @@ -2061,8 +2061,8 @@ circpad_setup_machine_on_circ(circuit_t *on_circ, static void circpad_circ_client_machine_init(void) { - circpad_machine_t *circ_client_machine - = tor_malloc_zero(sizeof(circpad_machine_t)); + circpad_machine_spec_t *circ_client_machine + = tor_malloc_zero(sizeof(circpad_machine_spec_t)); // XXX: Better conditions for merge.. Or disable this machine in // merge? @@ -2115,8 +2115,8 @@ circpad_circ_client_machine_init(void) static void circpad_circ_responder_machine_init(void) { - circpad_machine_t *circ_responder_machine - = tor_malloc_zero(sizeof(circpad_machine_t)); + circpad_machine_spec_t *circ_responder_machine + = tor_malloc_zero(sizeof(circpad_machine_spec_t)); /* Shut down the machine after we've sent enough packets */ circ_responder_machine->should_negotiate_end = 1; @@ -2235,14 +2235,14 @@ circpad_machines_free(void) { if (origin_padding_machines) { SMARTLIST_FOREACH(origin_padding_machines, - circpad_machine_t *, + circpad_machine_spec_t *, m, tor_free(m->states); tor_free(m)); smartlist_free(origin_padding_machines); } if (relay_padding_machines) { SMARTLIST_FOREACH(relay_padding_machines, - circpad_machine_t *, + circpad_machine_spec_t *, m, tor_free(m->states); tor_free(m)); smartlist_free(relay_padding_machines); } @@ -2417,7 +2417,7 @@ circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell) goto err; } else if (negotiate->command == CIRCPAD_COMMAND_START) { SMARTLIST_FOREACH_BEGIN(relay_padding_machines, - const circpad_machine_t *, m) { + const circpad_machine_spec_t *, m) { if (m->machine_num == negotiate->machine_type) { circpad_setup_machine_on_circ(circ, m); goto done; @@ -2520,7 +2520,7 @@ circpad_state_serialize(const circpad_state_t *state, } char * -circpad_machine_to_string(const circpad_machine_t *machine) +circpad_machine_spec_to_string(const circpad_machine_spec_t *machine) { smartlist_t *chunks = smartlist_new(); char *out; @@ -2538,7 +2538,7 @@ circpad_machine_to_string(const circpad_machine_t *machine) } // XXX: Writeme -const circpad_machine_t * +const circpad_machine_spec_t * circpad_string_to_machine(const char *str) { (void)str; diff --git a/src/core/or/circuitpadding.h b/src/core/or/circuitpadding.h index 6559f916d2..a43be58abb 100644 --- a/src/core/or/circuitpadding.h +++ b/src/core/or/circuitpadding.h @@ -389,7 +389,7 @@ typedef struct circpad_state_t { /** * End is a pseudo-state that causes the machine to go completely * idle, and optionally get torn down (depending on the - * value of circpad_machine_t.should_negotiate_end) + * value of circpad_machine_spec_t.should_negotiate_end) * * End MUST NOT occupy a slot in the machine state array. */ @@ -529,7 +529,7 @@ typedef struct circpad_machineinfo_t { typedef uint8_t circpad_machine_num_t; /** Global state machine structure from the consensus */ -typedef struct circpad_machine_t { +typedef struct circpad_machine_spec_t { /** Global machine number */ circpad_machine_num_t machine_num; @@ -569,7 +569,7 @@ typedef struct circpad_machine_t { * Number of states this machine has (ie: length of the states array). * XXX: This field is not needed other than for safety. */ circpad_statenum_t num_states; -} circpad_machine_t; +} circpad_machine_spec_t; void circpad_new_consensus_params(const networkstatus_t *ns); @@ -608,7 +608,7 @@ void circpad_machine_event_circ_has_no_relay_early(origin_circuit_t *circ); void circpad_machines_init(void); void circpad_machines_free(void); -void circpad_machine_states_init(circpad_machine_t *machine, +void circpad_machine_states_init(circpad_machine_spec_t *machine, circpad_statenum_t num_states); void circpad_circuit_free_all_machineinfos(circuit_t *circ); @@ -617,8 +617,8 @@ bool circpad_padding_is_from_expected_hop(circuit_t *circ, crypt_path_t *from_hop); /** Serializaton functions for writing to/from torrc and consensus */ -char *circpad_machine_to_string(const circpad_machine_t *machine); -const circpad_machine_t *circpad_string_to_machine(const char *str); +char *circpad_machine_spec_to_string(const circpad_machine_spec_t *machine); +const circpad_machine_spec_t *circpad_string_to_machine(const char *str); /* Padding negotiation between client and middle */ signed_error_t circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell); @@ -637,7 +637,7 @@ MOCK_DECL(circpad_decision_t, circpad_machine_schedule_padding,(circpad_machineinfo_t *)); MOCK_DECL(circpad_decision_t, -circpad_machine_transition, (circpad_machineinfo_t *mi, +circpad_machine_spec_transition, (circpad_machineinfo_t *mi, circpad_event_t event)); circpad_decision_t circpad_send_padding_cell_for_callback( diff --git a/src/test/test_circuitpadding.c b/src/test/test_circuitpadding.c index 5693c98e41..2ab8e2445e 100644 --- a/src/test/test_circuitpadding.c +++ b/src/test/test_circuitpadding.c @@ -61,7 +61,7 @@ static node_t padding_node; static node_t non_padding_node; static channel_t dummy_channel; -static circpad_machine_t circ_client_machine; +static circpad_machine_spec_t circ_client_machine; static void timers_advance_and_run(int64_t msec_update) @@ -1514,7 +1514,7 @@ test_circuitpadding_negotiation(void *arg) client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL; SMARTLIST_FOREACH(relay_padding_machines, - circpad_machine_t *, + circpad_machine_spec_t *, m, tor_free(m->states); tor_free(m)); smartlist_free(relay_padding_machines); relay_padding_machines = smartlist_new(); @@ -1594,10 +1594,11 @@ simulate_single_hop_extend(circuit_t *client, circuit_t *mid_relay, circpad_machine_event_circ_added_hop(TO_ORIGIN_CIRCUIT(client)); } -static circpad_machine_t * +static circpad_machine_spec_t * helper_create_conditional_machine(void) { - circpad_machine_t *ret = tor_malloc_zero(sizeof(circpad_machine_t)); + circpad_machine_spec_t *ret = + tor_malloc_zero(sizeof(circpad_machine_spec_t)); /* Start, burst */ circpad_machine_states_init(ret, 2); @@ -1630,7 +1631,7 @@ helper_create_conditional_machine(void) static void helper_create_conditional_machines(void) { - circpad_machine_t *add = helper_create_conditional_machine(); + circpad_machine_spec_t *add = helper_create_conditional_machine(); origin_padding_machines = smartlist_new(); relay_padding_machines = smartlist_new(); @@ -2158,7 +2159,7 @@ test_circuitpadding_sample_distribution(void *arg) } static circpad_decision_t -circpad_machine_transition_mock(circpad_machineinfo_t *mi, +circpad_machine_spec_transition_mock(circpad_machineinfo_t *mi, circpad_event_t event) { (void) mi; @@ -2178,7 +2179,7 @@ test_circuitpadding_machine_rate_limiting(void *arg) /* Ignore machine transitions for the purposes of this function, we only * really care about padding counts */ - MOCK(circpad_machine_transition, circpad_machine_transition_mock); + MOCK(circpad_machine_spec_transition, circpad_machine_spec_transition_mock); MOCK(circpad_send_command_to_hop, circpad_send_command_to_hop_mock); /* Setup machine and circuits */ @@ -2246,7 +2247,7 @@ test_circuitpadding_global_rate_limiting(void *arg) /* Ignore machine transitions for the purposes of this function, we only * really care about padding counts */ - MOCK(circpad_machine_transition, circpad_machine_transition_mock); + MOCK(circpad_machine_spec_transition, circpad_machine_spec_transition_mock); MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock); MOCK(circuit_package_relay_cell, circuit_package_relay_cell_mock); -- cgit v1.2.3-54-g00ecf From f4938179c50cc385b7599e5a03388792e46cde83 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Thu, 10 Jan 2019 12:38:00 +0200 Subject: Rename circpad_machineinfo_t -> circpad_machine_state_t --- src/core/or/circuit_st.h | 4 +-- src/core/or/circuitpadding.c | 61 +++++++++++++++++++++--------------------- src/core/or/circuitpadding.h | 46 ++++++++++++++++--------------- src/test/test_circuitpadding.c | 22 +++++++-------- 4 files changed, 68 insertions(+), 65 deletions(-) diff --git a/src/core/or/circuit_st.h b/src/core/or/circuit_st.h index 74f045d723..bfbd336c3a 100644 --- a/src/core/or/circuit_st.h +++ b/src/core/or/circuit_st.h @@ -13,7 +13,7 @@ struct hs_token_t; typedef struct circpad_machine_spec_t circpad_machine_spec_t; -typedef struct circpad_machineinfo_t circpad_machineinfo_t; +typedef struct circpad_machine_state_t circpad_machine_state_t; /** Number of padding state machines on a circuit. */ #define CIRCPAD_MAX_MACHINES (2) @@ -200,7 +200,7 @@ struct circuit_t { * * Each element of this array corresponds to a different padding machine, * and we can have up to CIRCPAD_MAX_MACHINES such machines. */ - circpad_machineinfo_t *padding_info[CIRCPAD_MAX_MACHINES]; + circpad_machine_state_t *padding_info[CIRCPAD_MAX_MACHINES]; }; #endif diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index c5a80d6c5d..6f10d3fb6a 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -23,7 +23,7 @@ * As specified by prop#254, clients can negotiate padding with relays by using * PADDING_NEGOTIATE cells. After successful padding negotiation, padding * machines are assigned to the circuit in their mutable form as a - * circpad_machineinfo_t. + * circpad_machine_state_t. * * Each state of a padding state machine can be either: * - A histogram that specifies inter-arrival padding delays. @@ -177,10 +177,11 @@ circpad_circuit_free_all_machineinfos(circuit_t *circ) /** * Allocate a new mutable machineinfo structure. */ -STATIC circpad_machineinfo_t * +STATIC circpad_machine_state_t * circpad_circuit_machineinfo_new(circuit_t *on_circ, int machine_index) { - circpad_machineinfo_t *mi = tor_malloc_zero(sizeof(circpad_machineinfo_t)); + circpad_machine_state_t *mi = + tor_malloc_zero(sizeof(circpad_machine_state_t)); mi->machine_index = machine_index; mi->on_circ = on_circ; @@ -195,7 +196,7 @@ circpad_circuit_machineinfo_new(circuit_t *on_circ, int machine_index) * invalid state. */ STATIC const circpad_state_t * -circpad_machine_current_state(const circpad_machineinfo_t *mi) +circpad_machine_current_state(const circpad_machine_state_t *mi) { const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi); @@ -227,7 +228,7 @@ circpad_machine_current_state(const circpad_machineinfo_t *mi) * It has a usec value of CIRCPAD_DELAY_INFINITE (UINT32_MAX). */ STATIC circpad_delay_t -circpad_histogram_bin_to_usec(const circpad_machineinfo_t *mi, +circpad_histogram_bin_to_usec(const circpad_machine_state_t *mi, circpad_hist_index_t bin) { const circpad_state_t *state = circpad_machine_current_state(mi); @@ -264,7 +265,7 @@ circpad_histogram_bin_to_usec(const circpad_machineinfo_t *mi, /** Return the midpoint of the histogram bin bin_index. */ static circpad_delay_t -circpad_get_histogram_bin_midpoint(const circpad_machineinfo_t *mi, +circpad_get_histogram_bin_midpoint(const circpad_machine_state_t *mi, int bin_index) { circpad_delay_t left_bound = circpad_histogram_bin_to_usec(mi, bin_index); @@ -285,7 +286,7 @@ circpad_get_histogram_bin_midpoint(const circpad_machineinfo_t *mi, * has range [start_usec+range_usec, CIRCPAD_DELAY_INFINITE]. */ STATIC circpad_hist_index_t -circpad_histogram_usec_to_bin(const circpad_machineinfo_t *mi, +circpad_histogram_usec_to_bin(const circpad_machine_state_t *mi, circpad_delay_t usec) { const circpad_state_t *state = circpad_machine_current_state(mi); @@ -333,7 +334,7 @@ circpad_histogram_usec_to_bin(const circpad_machineinfo_t *mi, * Called after a state transition, or if the bins are empty. */ STATIC void -circpad_machine_setup_tokens(circpad_machineinfo_t *mi) +circpad_machine_setup_tokens(circpad_machine_state_t *mi) { const circpad_state_t *state = circpad_machine_current_state(mi); @@ -365,7 +366,7 @@ circpad_machine_setup_tokens(circpad_machineinfo_t *mi) * Choose a length for this state (in cells), if specified. */ static void -circpad_choose_state_length(circpad_machineinfo_t *mi) +circpad_choose_state_length(circpad_machine_state_t *mi) { const circpad_state_t *state = circpad_machine_current_state(mi); double length; @@ -413,7 +414,7 @@ circpad_distribution_sample_iat_delay(const circpad_state_t *state, * that bin's [start,end) time range. */ STATIC circpad_delay_t -circpad_machine_sample_delay(circpad_machineinfo_t *mi) +circpad_machine_sample_delay(circpad_machine_state_t *mi) { const circpad_state_t *state = circpad_machine_current_state(mi); const circpad_hist_token_t *histogram = NULL; @@ -602,7 +603,7 @@ circpad_distribution_sample(circpad_distribution_t dist) * greater than the target, and that has tokens remaining. */ static circpad_hist_index_t -circpad_machine_first_higher_index(const circpad_machineinfo_t *mi, +circpad_machine_first_higher_index(const circpad_machine_state_t *mi, circpad_delay_t target_bin_usec) { circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, @@ -624,7 +625,7 @@ circpad_machine_first_higher_index(const circpad_machineinfo_t *mi, * target_bin_usec, and that still has tokens remaining. */ static circpad_hist_index_t -circpad_machine_first_lower_index(const circpad_machineinfo_t *mi, +circpad_machine_first_lower_index(const circpad_machine_state_t *mi, circpad_delay_t target_bin_usec) { circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, @@ -645,7 +646,7 @@ circpad_machine_first_lower_index(const circpad_machineinfo_t *mi, * greater than the target. */ STATIC void -circpad_machine_remove_higher_token(circpad_machineinfo_t *mi, +circpad_machine_remove_higher_token(circpad_machine_state_t *mi, circpad_delay_t target_bin_usec) { /* We need to remove the token from the first bin @@ -666,7 +667,7 @@ circpad_machine_remove_higher_token(circpad_machineinfo_t *mi, * lower than the target. */ STATIC void -circpad_machine_remove_lower_token(circpad_machineinfo_t *mi, +circpad_machine_remove_lower_token(circpad_machine_state_t *mi, circpad_delay_t target_bin_usec) { circpad_hist_index_t bin = circpad_machine_first_lower_index(mi, @@ -695,7 +696,7 @@ circpad_machine_remove_lower_token(circpad_machineinfo_t *mi, * If it is false, use bin index distance only. */ STATIC void -circpad_machine_remove_closest_token(circpad_machineinfo_t *mi, +circpad_machine_remove_closest_token(circpad_machine_state_t *mi, circpad_delay_t target_bin_usec, bool use_usec) { @@ -777,7 +778,7 @@ circpad_machine_remove_closest_token(circpad_machineinfo_t *mi, * If it is empty, do nothing. */ static void -circpad_machine_remove_exact(circpad_machineinfo_t *mi, +circpad_machine_remove_exact(circpad_machine_state_t *mi, circpad_delay_t target_bin_usec) { circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi, @@ -794,7 +795,7 @@ circpad_machine_remove_exact(circpad_machineinfo_t *mi, * otherwise returns 0. */ static circpad_decision_t -check_machine_token_supply(circpad_machineinfo_t *mi) +check_machine_token_supply(circpad_machine_state_t *mi) { uint32_t histogram_total_tokens = 0; @@ -834,7 +835,7 @@ check_machine_token_supply(circpad_machineinfo_t *mi) * Returns 1 if we transition states, 0 otherwise. */ STATIC circpad_decision_t -circpad_machine_remove_token(circpad_machineinfo_t *mi) +circpad_machine_remove_token(circpad_machine_state_t *mi) { const circpad_state_t *state = NULL; circpad_time_t current_time; @@ -961,7 +962,7 @@ circpad_send_command_to_hop,(origin_circuit_t *circ, uint8_t hopnum, * CIRCPAD_STATE_CHANGED. Otherwise return CIRCPAD_STATE_UNCHANGED. */ circpad_decision_t -circpad_send_padding_cell_for_callback(circpad_machineinfo_t *mi) +circpad_send_padding_cell_for_callback(circpad_machine_state_t *mi) { circuit_t *circ = mi->on_circ; int machine_idx = mi->machine_index; @@ -1040,7 +1041,7 @@ circpad_send_padding_cell_for_callback(circpad_machineinfo_t *mi) /** * Tor-timer compatible callback that tells us to send a padding cell. * - * Timers are associated with circpad_machineinfo_t's. When the machineinfo + * Timers are associated with circpad_machine_state_t's. When the machineinfo * is freed on a circuit, the timers are cancelled. Since the lifetime * of machineinfo is always longer than the timers, handles are not * needed. @@ -1049,7 +1050,7 @@ static void circpad_send_padding_callback(tor_timer_t *timer, void *args, const struct monotime_t *time) { - circpad_machineinfo_t *mi = ((circpad_machineinfo_t*)args); + circpad_machine_state_t *mi = ((circpad_machine_state_t*)args); (void)timer; (void)time; if (mi && mi->on_circ) { @@ -1095,7 +1096,7 @@ circpad_new_consensus_params(const networkstatus_t *ns) * Returns 1 if limits are set and we've hit them. Otherwise returns 0. */ STATIC bool -circpad_machine_reached_padding_limit(circpad_machineinfo_t *mi) +circpad_machine_reached_padding_limit(circpad_machine_state_t *mi) { const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi); @@ -1143,7 +1144,7 @@ circpad_machine_reached_padding_limit(circpad_machineinfo_t *mi) * 0 otherwise. */ MOCK_IMPL(circpad_decision_t, -circpad_machine_schedule_padding,(circpad_machineinfo_t *mi)) +circpad_machine_schedule_padding,(circpad_machine_state_t *mi)) { circpad_delay_t in_usec = 0; struct timeval timeout; @@ -1233,7 +1234,7 @@ circpad_machine_schedule_padding,(circpad_machineinfo_t *mi)) * not access it. */ static void -circpad_machine_spec_transitioned_to_end(circpad_machineinfo_t *mi) +circpad_machine_spec_transitioned_to_end(circpad_machine_state_t *mi) { const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi); @@ -1283,7 +1284,7 @@ circpad_machine_spec_transitioned_to_end(circpad_machineinfo_t *mi) * Returns 1 if we transition states, 0 otherwise. */ MOCK_IMPL(circpad_decision_t, -circpad_machine_spec_transition,(circpad_machineinfo_t *mi, +circpad_machine_spec_transition,(circpad_machine_state_t *mi, circpad_event_t event)) { const circpad_state_t *state = @@ -1364,7 +1365,7 @@ circpad_machine_spec_transition,(circpad_machineinfo_t *mi, */ static void circpad_estimate_circ_rtt_on_received(circuit_t *circ, - circpad_machineinfo_t *mi) + circpad_machine_state_t *mi) { /* Origin circuits don't estimate RTT. They could do it easily enough, * but they have no reason to use it in any delay calculations. */ @@ -1411,7 +1412,7 @@ circpad_estimate_circ_rtt_on_received(circuit_t *circ, */ static void circpad_estimate_circ_rtt_on_send(circuit_t *circ, - circpad_machineinfo_t *mi) + circpad_machine_state_t *mi) { /* Origin circuits don't estimate RTT. They could do it easily enough, * but they have no reason to use it in any delay calculations. */ @@ -1556,7 +1557,7 @@ circpad_cell_event_padding_received(circuit_t *on_circ) * Return 1 if we decide to transition, 0 otherwise. */ circpad_decision_t -circpad_internal_event_infinity(circpad_machineinfo_t *mi) +circpad_internal_event_infinity(circpad_machine_state_t *mi) { return circpad_machine_spec_transition(mi, CIRCPAD_EVENT_INFINITY); } @@ -1570,7 +1571,7 @@ circpad_internal_event_infinity(circpad_machineinfo_t *mi) * Return 1 if we decide to transition, 0 otherwise. */ circpad_decision_t -circpad_internal_event_bins_empty(circpad_machineinfo_t *mi) +circpad_internal_event_bins_empty(circpad_machine_state_t *mi) { if (circpad_machine_spec_transition(mi, CIRCPAD_EVENT_BINS_EMPTY) == CIRCPAD_STATE_CHANGED) { @@ -1589,7 +1590,7 @@ circpad_internal_event_bins_empty(circpad_machineinfo_t *mi) * Return 1 if we decide to transition, 0 otherwise. */ circpad_decision_t -circpad_internal_event_state_length_up(circpad_machineinfo_t *mi) +circpad_internal_event_state_length_up(circpad_machine_state_t *mi) { return circpad_machine_spec_transition(mi, CIRCPAD_EVENT_LENGTH_COUNT); } diff --git a/src/core/or/circuitpadding.h b/src/core/or/circuitpadding.h index a43be58abb..f3d176ebf7 100644 --- a/src/core/or/circuitpadding.h +++ b/src/core/or/circuitpadding.h @@ -88,7 +88,7 @@ typedef uint32_t circpad_delay_t; /** * Macro to clarify when we're checking the infinity bin. * - * Works with either circpad_state_t or circpad_machineinfo_t + * Works with either circpad_state_t or circpad_machine_state_t */ #define CIRCPAD_INFINITY_BIN(mi) ((mi)->histogram_len-1) @@ -237,10 +237,10 @@ typedef uint16_t circpad_statenum_t; #define CIRCPAD_MAX_HISTOGRAM_LEN (sizeof(circpad_delay_t)*8 + 1) /** - * A state of a padding machine. The information here are immutable and + * A state of a padding state machine. The information here are immutable and * represent the initial form of the state; it does not get updated as things * happen. The mutable information that gets updated in runtime are carried in - * a circpad_machineinfo_t. + * a circpad_machine_state_t. * * This struct describes the histograms and parameters of a single * state in the adaptive padding machine. Instances of this struct @@ -430,7 +430,7 @@ typedef struct circpad_state_t { * * XXX: Play with layout to minimize space on x64 Linux (most common relay). */ -typedef struct circpad_machineinfo_t { +typedef struct circpad_machine_state_t { /** The callback pointer for the padding callbacks. * * These timers stick around the machineinfo until the machineinfo's circuit @@ -514,7 +514,7 @@ typedef struct circpad_machineinfo_t { * CIRCPAD_MAX_MACHINES define). */ unsigned machine_index : 1; -} circpad_machineinfo_t; +} circpad_machine_state_t; /** Helper macro to get an actual state machine from a machineinfo */ #define CIRCPAD_GET_MACHINE(machineinfo) \ @@ -591,10 +591,12 @@ void circpad_cell_event_padding_sent(circuit_t *on_circ); void circpad_cell_event_padding_received(circuit_t *on_circ); /** Internal events are events the machines send to themselves */ -circpad_decision_t circpad_internal_event_infinity(circpad_machineinfo_t *mi); -circpad_decision_t circpad_internal_event_bins_empty(circpad_machineinfo_t *); +circpad_decision_t +circpad_internal_event_infinity(circpad_machine_state_t *mi); +circpad_decision_t +circpad_internal_event_bins_empty(circpad_machine_state_t *); circpad_decision_t circpad_internal_event_state_length_up( - circpad_machineinfo_t *); + circpad_machine_state_t *); /** Machine creation events are events that cause us to set up or * tear down padding state machines. */ @@ -634,47 +636,47 @@ bool circpad_padding_negotiated(circuit_t *circ, uint8_t response); MOCK_DECL(circpad_decision_t, -circpad_machine_schedule_padding,(circpad_machineinfo_t *)); +circpad_machine_schedule_padding,(circpad_machine_state_t *)); MOCK_DECL(circpad_decision_t, -circpad_machine_spec_transition, (circpad_machineinfo_t *mi, +circpad_machine_spec_transition, (circpad_machine_state_t *mi, circpad_event_t event)); circpad_decision_t circpad_send_padding_cell_for_callback( - circpad_machineinfo_t *mi); + circpad_machine_state_t *mi); #ifdef CIRCUITPADDING_PRIVATE STATIC circpad_delay_t -circpad_machine_sample_delay(circpad_machineinfo_t *mi); +circpad_machine_sample_delay(circpad_machine_state_t *mi); STATIC bool -circpad_machine_reached_padding_limit(circpad_machineinfo_t *mi); +circpad_machine_reached_padding_limit(circpad_machine_state_t *mi); STATIC -circpad_decision_t circpad_machine_remove_token(circpad_machineinfo_t *mi); +circpad_decision_t circpad_machine_remove_token(circpad_machine_state_t *mi); STATIC circpad_delay_t -circpad_histogram_bin_to_usec(const circpad_machineinfo_t *mi, +circpad_histogram_bin_to_usec(const circpad_machine_state_t *mi, circpad_hist_index_t bin); STATIC const circpad_state_t * -circpad_machine_current_state(const circpad_machineinfo_t *mi); +circpad_machine_current_state(const circpad_machine_state_t *mi); STATIC circpad_hist_index_t circpad_histogram_usec_to_bin( - const circpad_machineinfo_t *mi, + const circpad_machine_state_t *mi, circpad_delay_t us); -STATIC circpad_machineinfo_t *circpad_circuit_machineinfo_new( +STATIC circpad_machine_state_t *circpad_circuit_machineinfo_new( circuit_t *on_circ, int machine_index); -STATIC void circpad_machine_remove_higher_token(circpad_machineinfo_t *mi, +STATIC void circpad_machine_remove_higher_token(circpad_machine_state_t *mi, circpad_delay_t target_bin_us); -STATIC void circpad_machine_remove_lower_token(circpad_machineinfo_t *mi, +STATIC void circpad_machine_remove_lower_token(circpad_machine_state_t *mi, circpad_delay_t target_bin_us); -STATIC void circpad_machine_remove_closest_token(circpad_machineinfo_t *mi, +STATIC void circpad_machine_remove_closest_token(circpad_machine_state_t *mi, circpad_delay_t target_bin_us, bool use_usec); -STATIC void circpad_machine_setup_tokens(circpad_machineinfo_t *mi); +STATIC void circpad_machine_setup_tokens(circpad_machine_state_t *mi); MOCK_DECL(STATIC signed_error_t, circpad_send_command_to_hop,(origin_circuit_t *circ, uint8_t hopnum, diff --git a/src/test/test_circuitpadding.c b/src/test/test_circuitpadding.c index 2ab8e2445e..f4d003969e 100644 --- a/src/test/test_circuitpadding.c +++ b/src/test/test_circuitpadding.c @@ -479,7 +479,7 @@ helper_create_machine_with_big_histogram(circpad_removal_t removal_strategy) } static circpad_decision_t -circpad_machine_schedule_padding_mock(circpad_machineinfo_t *mi) +circpad_machine_schedule_padding_mock(circpad_machine_state_t *mi) { (void)mi; return 0; @@ -495,7 +495,7 @@ mock_monotime_absolute_usec(void) static void test_circuitpadding_token_removal_higher(void *arg) { - circpad_machineinfo_t *mi; + circpad_machine_state_t *mi; (void)arg; /* Mock it up */ @@ -592,7 +592,7 @@ test_circuitpadding_token_removal_higher(void *arg) static void test_circuitpadding_token_removal_lower(void *arg) { - circpad_machineinfo_t *mi; + circpad_machine_state_t *mi; (void)arg; /* Mock it up */ @@ -689,7 +689,7 @@ test_circuitpadding_token_removal_lower(void *arg) static void test_circuitpadding_closest_token_removal(void *arg) { - circpad_machineinfo_t *mi; + circpad_machine_state_t *mi; (void)arg; /* Mock it up */ @@ -794,7 +794,7 @@ test_circuitpadding_closest_token_removal(void *arg) static void test_circuitpadding_closest_token_removal_usec(void *arg) { - circpad_machineinfo_t *mi; + circpad_machine_state_t *mi; (void)arg; /* Mock it up */ @@ -902,7 +902,7 @@ test_circuitpadding_closest_token_removal_usec(void *arg) static void test_circuitpadding_token_removal_exact(void *arg) { - circpad_machineinfo_t *mi; + circpad_machine_state_t *mi; (void)arg; /* Mock it up */ @@ -963,7 +963,7 @@ void test_circuitpadding_tokens(void *arg) { const circpad_state_t *state; - circpad_machineinfo_t *mi; + circpad_machine_state_t *mi; (void)arg; /** Test plan: @@ -2114,7 +2114,7 @@ helper_circpad_circ_distribution_machine_setup(int min, int max) static void test_circuitpadding_sample_distribution(void *arg) { - circpad_machineinfo_t *mi; + circpad_machine_state_t *mi; int n_samples; int n_states; @@ -2159,7 +2159,7 @@ test_circuitpadding_sample_distribution(void *arg) } static circpad_decision_t -circpad_machine_spec_transition_mock(circpad_machineinfo_t *mi, +circpad_machine_spec_transition_mock(circpad_machine_state_t *mi, circpad_event_t event) { (void) mi; @@ -2174,7 +2174,7 @@ test_circuitpadding_machine_rate_limiting(void *arg) { (void) arg; bool retval; - circpad_machineinfo_t *mi; + circpad_machine_state_t *mi; int i; /* Ignore machine transitions for the purposes of this function, we only @@ -2242,7 +2242,7 @@ test_circuitpadding_global_rate_limiting(void *arg) { (void) arg; bool retval; - circpad_machineinfo_t *mi; + circpad_machine_state_t *mi; int i; /* Ignore machine transitions for the purposes of this function, we only -- cgit v1.2.3-54-g00ecf From e0e0338dc42ed786979759d56e0b65f129a5df8c Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Thu, 10 Jan 2019 12:54:55 +0200 Subject: Rename crypto_rand_uint32() -> crypto_rand_u32() See https://github.com/torproject/tor/pull/624#discussion_r246453777 --- src/lib/crypt_ops/crypto_rand.c | 2 +- src/lib/crypt_ops/crypto_rand.h | 2 +- src/lib/math/prob_distr.c | 16 ++++++++-------- src/test/test_prob_distr.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/lib/crypt_ops/crypto_rand.c b/src/lib/crypt_ops/crypto_rand.c index 7a2c417e5a..d148dfb3a8 100644 --- a/src/lib/crypt_ops/crypto_rand.c +++ b/src/lib/crypt_ops/crypto_rand.c @@ -532,7 +532,7 @@ crypto_rand_unmocked(char *to, size_t n) * Draw an unsigned 32-bit integer uniformly at random. */ uint32_t -crypto_rand_uint32(void) +crypto_rand_u32(void) { uint32_t rand; crypto_rand((void*)&rand, sizeof(rand)); diff --git a/src/lib/crypt_ops/crypto_rand.h b/src/lib/crypt_ops/crypto_rand.h index 61fd82c806..874fcd4d08 100644 --- a/src/lib/crypt_ops/crypto_rand.h +++ b/src/lib/crypt_ops/crypto_rand.h @@ -27,7 +27,7 @@ int crypto_rand_int(unsigned int max); int crypto_rand_int_range(unsigned int min, unsigned int max); uint64_t crypto_rand_uint64_range(uint64_t min, uint64_t max); time_t crypto_rand_time_range(time_t min, time_t max); -uint32_t crypto_rand_uint32(void); +uint32_t crypto_rand_u32(void); uint64_t crypto_rand_uint64(uint64_t max); double crypto_rand_double(void); struct tor_weak_rng_t; diff --git a/src/lib/math/prob_distr.c b/src/lib/math/prob_distr.c index 832d3b4d96..f5e5218aa7 100644 --- a/src/lib/math/prob_distr.c +++ b/src/lib/math/prob_distr.c @@ -458,7 +458,7 @@ random_uniform_01(void) * system is broken. */ z = 0; - while ((x = crypto_rand_uint32()) == 0) { + while ((x = crypto_rand_u32()) == 0) { if (z >= 1088) /* Your bit sampler is broken. Go home. */ return 0; @@ -472,8 +472,8 @@ random_uniform_01(void) * occur only with measure zero in the uniform distribution on * [0, 1]. */ - hi = crypto_rand_uint32() | UINT32_C(0x80000000); - lo = crypto_rand_uint32() | UINT32_C(0x00000001); + hi = crypto_rand_u32() | UINT32_C(0x80000000); + lo = crypto_rand_u32() | UINT32_C(0x00000001); /* Round to nearest scaled significand in [2^63, 2^64]. */ s = hi*(double)4294967296 + lo; @@ -1402,7 +1402,7 @@ logistic_sample(const struct dist *dist) { const struct logistic *L = const_container_of(dist, struct logistic, base); - uint32_t s = crypto_rand_uint32(); + uint32_t s = crypto_rand_u32(); double t = random_uniform_01(); double p0 = random_uniform_01(); @@ -1460,7 +1460,7 @@ log_logistic_sample(const struct dist *dist) { const struct log_logistic *LL = const_container_of(dist, struct log_logistic, base); - uint32_t s = crypto_rand_uint32(); + uint32_t s = crypto_rand_u32(); double p0 = random_uniform_01(); return sample_log_logistic_scaleshape(s, p0, LL->alpha, LL->beta); @@ -1517,7 +1517,7 @@ weibull_sample(const struct dist *dist) { const struct weibull *W = const_container_of(dist, struct weibull, base); - uint32_t s = crypto_rand_uint32(); + uint32_t s = crypto_rand_u32(); double p0 = random_uniform_01(); return sample_weibull(s, p0, W->lambda, W->k); @@ -1574,7 +1574,7 @@ genpareto_sample(const struct dist *dist) { const struct genpareto *GP = const_container_of(dist, struct genpareto, base); - uint32_t s = crypto_rand_uint32(); + uint32_t s = crypto_rand_u32(); double p0 = random_uniform_01(); return sample_genpareto_locscale(s, p0, GP->mu, GP->sigma, GP->xi); @@ -1621,7 +1621,7 @@ genpareto_isf(const struct dist *dist, double p) double geometric_sample(double p) { - uint32_t s = crypto_rand_uint32(); + uint32_t s = crypto_rand_u32(); double p0 = random_uniform_01(); return sample_geometric(s, p0, p); } diff --git a/src/test/test_prob_distr.c b/src/test/test_prob_distr.c index bf0f9e059d..75e7e360a6 100644 --- a/src/test/test_prob_distr.c +++ b/src/test/test_prob_distr.c @@ -1107,7 +1107,7 @@ static uint32_t deterministic_rand_counter; static void init_deterministic_rand(void) { - deterministic_rand_counter = crypto_rand_uint32(); + deterministic_rand_counter = crypto_rand_u32(); } /** Produce deterministic randomness for the stochastic tests using the global -- cgit v1.2.3-54-g00ecf From 4db9c3d63e93912a82eabeb8a9fb851d5196c3c8 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Thu, 10 Jan 2019 13:03:41 +0200 Subject: Unittest for tor_isinf(). --- src/test/test_util.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/test/test_util.c b/src/test/test_util.c index bf64cff7ef..e8233d9d30 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -70,6 +70,28 @@ #define INFINITY_DBL ((double)INFINITY) #define NAN_DBL ((double)NAN) +/** Test the tor_isinf() wrapper */ +static void +test_tor_isinf(void *arg) +{ + (void) arg; + + tt_assert(tor_isinf(INFINITY_DBL)); + + tt_assert(!tor_isinf(NAN_DBL)); + tt_assert(!tor_isinf(DBL_EPSILON)); + tt_assert(!tor_isinf(DBL_MAX)); + tt_assert(!tor_isinf(DBL_MIN)); + + tt_assert(!tor_isinf(0.0)); + tt_assert(!tor_isinf(0.1)); + tt_assert(!tor_isinf(3)); + tt_assert(!tor_isinf(3.14)); + + done: + ; +} + /* XXXX this is a minimal wrapper to make the unit tests compile with the * changed tor_timegm interface. */ static time_t @@ -6191,6 +6213,7 @@ struct testcase_t util_tests[] = { UTIL_TEST(mathlog, 0), UTIL_TEST(fraction, 0), UTIL_TEST(weak_random, 0), + { "tor_isinf", test_tor_isinf, TT_FORK, NULL, NULL }, { "socket_ipv4", test_util_socket, TT_FORK, &passthrough_setup, (void*)"4" }, { "socket_ipv6", test_util_socket, TT_FORK, -- cgit v1.2.3-54-g00ecf From 2b29bccb8553c4b80eccd3438b4b1463687b1d4c Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Thu, 10 Jan 2019 13:05:05 +0200 Subject: Misc trivial improvements around circpadding code. --- doc/tor.1.txt | 5 +++++ src/core/or/circuitpadding.c | 1 + src/core/or/relay.c | 1 + 3 files changed, 7 insertions(+) diff --git a/doc/tor.1.txt b/doc/tor.1.txt index 455356163c..6dbd4af377 100644 --- a/doc/tor.1.txt +++ b/doc/tor.1.txt @@ -1025,6 +1025,11 @@ The following options are useful only for clients (that is, if to use for "middle" hops in your normal circuits. Normal circuits include all circuits except for direct connections to directory servers. Middle hops are all hops other than exit and entry. + ++ + This is an **experimental** feature that is meant to be used by researchers + and developers to test new features in the Tor network safely. Using it + without care will strongly influence your anonymity. This feature might get + removed in the future. + The HSLayer2Node and HSLayer3Node options override this option for onion service circuits, if they are set. The vanguards addon will read this diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index 6f10d3fb6a..6a39a7b371 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -304,6 +304,7 @@ circpad_histogram_usec_to_bin(const circpad_machine_state_t *mi, else start_usec = state->start_usec; + /* The first bin (#0) has zero width and starts (and ends) at start_usec. */ if (usec <= start_usec) return 0; diff --git a/src/core/or/relay.c b/src/core/or/relay.c index 9c0f3bbbe3..00c2111955 100644 --- a/src/core/or/relay.c +++ b/src/core/or/relay.c @@ -527,6 +527,7 @@ relay_command_to_string(uint8_t command) case RELAY_COMMAND_EXTEND2: return "EXTEND2"; case RELAY_COMMAND_EXTENDED2: return "EXTENDED2"; case RELAY_COMMAND_PADDING_NEGOTIATE: return "PADDING_NEGOTIATE"; + case RELAY_COMMAND_PADDING_NEGOTIATED: return "PADDING_NEGOTIATED"; default: tor_snprintf(buf, sizeof(buf), "Unrecognized relay command %u", (unsigned)command); -- cgit v1.2.3-54-g00ecf From ca544246020cddfee3f7fd46899dcf3a9382eb62 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 10 Jan 2019 17:10:39 +0000 Subject: Fix type redefinition errors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In file included from src/core/or/connection_edge.c:70:0: ./src/core/or/circuitpadding.h:16:26: error: redefinition of typedef ‘circuit_t’ ./src/core/or/or.h:930:26: note: previous declaration of ‘circuit_t’ was here ./src/core/or/circuitpadding.h:17:33: error: redefinition of typedef ‘origin_circuit_t’ ./src/core/or/or.h:931:33: note: previous declaration of ‘origin_circuit_t’ was here ./src/core/or/circuitpadding.h:18:23: error: redefinition of typedef ‘cell_t’ ./src/core/or/or.h:628:23: note: previous declaration of ‘cell_t’ was here typedef doesn't work for forward declarations, but plain struct without a typedef wrapper does (and unlike the _t type aliases makes it clearer for everyone whether you're talking about the struct or the pointer). --- src/core/or/circuitpadding.h | 53 +++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/src/core/or/circuitpadding.h b/src/core/or/circuitpadding.h index f3d176ebf7..628f27ec11 100644 --- a/src/core/or/circuitpadding.h +++ b/src/core/or/circuitpadding.h @@ -13,9 +13,9 @@ #include "src/trunnel/circpad_negotiation.h" #include "lib/evloop/timers.h" -typedef struct circuit_t circuit_t; -typedef struct origin_circuit_t origin_circuit_t; -typedef struct cell_t cell_t; +struct circuit_t; +struct origin_circuit_t; +struct cell_t; /** * Signed error return with the specific property that negative @@ -440,7 +440,7 @@ typedef struct circpad_machine_state_t { tor_timer_t *padding_timer; /** The circuit for this machine */ - circuit_t *on_circ; + struct circuit_t *on_circ; /** A mutable copy of the histogram for the current state. * NULL if remove_tokens is false for that state */ @@ -576,19 +576,19 @@ void circpad_new_consensus_params(const networkstatus_t *ns); /** * The following are event call-in points that are of interest to * the state machines. They are called during cell processing. */ -void circpad_deliver_unrecognized_cell_events(circuit_t *circ, +void circpad_deliver_unrecognized_cell_events(struct circuit_t *circ, cell_direction_t dir); -void circpad_deliver_sent_relay_cell_events(circuit_t *circ, +void circpad_deliver_sent_relay_cell_events(struct circuit_t *circ, uint8_t relay_command); -void circpad_deliver_recognized_relay_cell_events(circuit_t *circ, +void circpad_deliver_recognized_relay_cell_events(struct circuit_t *circ, uint8_t relay_command, crypt_path_t *layer_hint); /** Cell events are delivered by the above delivery functions */ -void circpad_cell_event_nonpadding_sent(circuit_t *on_circ); -void circpad_cell_event_nonpadding_received(circuit_t *on_circ); -void circpad_cell_event_padding_sent(circuit_t *on_circ); -void circpad_cell_event_padding_received(circuit_t *on_circ); +void circpad_cell_event_nonpadding_sent(struct circuit_t *on_circ); +void circpad_cell_event_nonpadding_received(struct circuit_t *on_circ); +void circpad_cell_event_padding_sent(struct circuit_t *on_circ); +void circpad_cell_event_padding_received(struct circuit_t *on_circ); /** Internal events are events the machines send to themselves */ circpad_decision_t @@ -600,12 +600,13 @@ circpad_decision_t circpad_internal_event_state_length_up( /** Machine creation events are events that cause us to set up or * tear down padding state machines. */ -void circpad_machine_event_circ_added_hop(origin_circuit_t *on_circ); -void circpad_machine_event_circ_built(origin_circuit_t *circ); -void circpad_machine_event_circ_purpose_changed(origin_circuit_t *circ); -void circpad_machine_event_circ_has_streams(origin_circuit_t *circ); -void circpad_machine_event_circ_has_no_streams(origin_circuit_t *circ); -void circpad_machine_event_circ_has_no_relay_early(origin_circuit_t *circ); +void circpad_machine_event_circ_added_hop(struct origin_circuit_t *on_circ); +void circpad_machine_event_circ_built(struct origin_circuit_t *circ); +void circpad_machine_event_circ_purpose_changed(struct origin_circuit_t *circ); +void circpad_machine_event_circ_has_streams(struct origin_circuit_t *circ); +void circpad_machine_event_circ_has_no_streams(struct origin_circuit_t *circ); +void +circpad_machine_event_circ_has_no_relay_early(struct origin_circuit_t *circ); void circpad_machines_init(void); void circpad_machines_free(void); @@ -613,9 +614,9 @@ void circpad_machines_free(void); void circpad_machine_states_init(circpad_machine_spec_t *machine, circpad_statenum_t num_states); -void circpad_circuit_free_all_machineinfos(circuit_t *circ); +void circpad_circuit_free_all_machineinfos(struct circuit_t *circ); -bool circpad_padding_is_from_expected_hop(circuit_t *circ, +bool circpad_padding_is_from_expected_hop(struct circuit_t *circ, crypt_path_t *from_hop); /** Serializaton functions for writing to/from torrc and consensus */ @@ -623,14 +624,16 @@ char *circpad_machine_spec_to_string(const circpad_machine_spec_t *machine); const circpad_machine_spec_t *circpad_string_to_machine(const char *str); /* Padding negotiation between client and middle */ -signed_error_t circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell); -signed_error_t circpad_handle_padding_negotiated(circuit_t *circ, cell_t *cell, +signed_error_t circpad_handle_padding_negotiate(struct circuit_t *circ, + struct cell_t *cell); +signed_error_t circpad_handle_padding_negotiated(struct circuit_t *circ, + struct cell_t *cell, crypt_path_t *layer_hint); -signed_error_t circpad_negotiate_padding(origin_circuit_t *circ, +signed_error_t circpad_negotiate_padding(struct origin_circuit_t *circ, circpad_machine_num_t machine, uint8_t target_hopnum, uint8_t command); -bool circpad_padding_negotiated(circuit_t *circ, +bool circpad_padding_negotiated(struct circuit_t *circ, circpad_machine_num_t machine, uint8_t command, uint8_t response); @@ -667,7 +670,7 @@ STATIC circpad_hist_index_t circpad_histogram_usec_to_bin( circpad_delay_t us); STATIC circpad_machine_state_t *circpad_circuit_machineinfo_new( - circuit_t *on_circ, + struct circuit_t *on_circ, int machine_index); STATIC void circpad_machine_remove_higher_token(circpad_machine_state_t *mi, circpad_delay_t target_bin_us); @@ -679,7 +682,7 @@ STATIC void circpad_machine_remove_closest_token(circpad_machine_state_t *mi, STATIC void circpad_machine_setup_tokens(circpad_machine_state_t *mi); MOCK_DECL(STATIC signed_error_t, -circpad_send_command_to_hop,(origin_circuit_t *circ, uint8_t hopnum, +circpad_send_command_to_hop,(struct origin_circuit_t *circ, uint8_t hopnum, uint8_t relay_command, const uint8_t *payload, ssize_t payload_len)); -- cgit v1.2.3-54-g00ecf From 948856c03ef417cc9dad9ef85b7bb5c164edb742 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 10 Jan 2019 17:12:32 +0000 Subject: Fix more type redefinition errors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In file included from ./src/core/or/or_circuit_st.h:12:0, from src/core/or/circuitlist.c:112: ./src/core/or/circuit_st.h:15:39: error: redefinition of typedef ‘circpad_machine_spec_t’ ./src/core/or/circuitpadding.h:572:3: note: previous declaration of ‘circpad_machine_spec_t’ was here ./src/core/or/circuit_st.h:16:40: error: redefinition of typedef ‘circpad_machine_state_t’ ./src/core/or/circuitpadding.h:517:3: note: previous declaration of ‘circpad_machine_state_t’ was here --- src/core/or/circuit_st.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/or/circuit_st.h b/src/core/or/circuit_st.h index bfbd336c3a..47639940d4 100644 --- a/src/core/or/circuit_st.h +++ b/src/core/or/circuit_st.h @@ -12,8 +12,8 @@ #include "core/or/cell_queue_st.h" struct hs_token_t; -typedef struct circpad_machine_spec_t circpad_machine_spec_t; -typedef struct circpad_machine_state_t circpad_machine_state_t; +struct circpad_machine_spec_t; +struct circpad_machine_state_t; /** Number of padding state machines on a circuit. */ #define CIRCPAD_MAX_MACHINES (2) @@ -189,7 +189,7 @@ struct circuit_t { * * Each element of this array corresponds to a different padding machine, * and we can have up to CIRCPAD_MAX_MACHINES such machines. */ - const circpad_machine_spec_t *padding_machine[CIRCPAD_MAX_MACHINES]; + const struct circpad_machine_spec_t *padding_machine[CIRCPAD_MAX_MACHINES]; /** Adaptive Padding machine info for above machines. This is the * per-circuit mutable information, such as the current state and @@ -200,7 +200,7 @@ struct circuit_t { * * Each element of this array corresponds to a different padding machine, * and we can have up to CIRCPAD_MAX_MACHINES such machines. */ - circpad_machine_state_t *padding_info[CIRCPAD_MAX_MACHINES]; + struct circpad_machine_state_t *padding_info[CIRCPAD_MAX_MACHINES]; }; #endif -- cgit v1.2.3-54-g00ecf From 531df9590d006434b31cc81871b73c31ca9f896b Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 10 Jan 2019 17:12:56 +0000 Subject: Move ceil call back into the geometric sampler. Test exactly what the geometric sampler returns, because that's what the downstream callers of it are going to use. While here, also assert that the geometric sampler returns a positive integer. (Our geometric distribution is the one suported on {1, 2, 3, ...} that returns the number of trials before the first success, not the one supported on {0, 1, 2, ...} that returns the number of failures before the first success.) --- src/lib/math/prob_distr.c | 2 +- src/test/test_prob_distr.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/lib/math/prob_distr.c b/src/lib/math/prob_distr.c index f5e5218aa7..e170d000fe 100644 --- a/src/lib/math/prob_distr.c +++ b/src/lib/math/prob_distr.c @@ -1308,7 +1308,7 @@ sample_geometric(uint32_t s, double p0, double p) if (p >= 1) return 1; - return (-x/log1p(-p)); + return ceil(-x/log1p(-p)); } /*******************************************************************/ diff --git a/src/test/test_prob_distr.c b/src/test/test_prob_distr.c index 75e7e360a6..ec4e943e9a 100644 --- a/src/test/test_prob_distr.c +++ b/src/test/test_prob_distr.c @@ -958,7 +958,17 @@ test_stochastic_geometric_impl(double p) size_t C[PSI_DF] = {0}; for (j = 0; j < NSAMPLES; j++) { - double n_tmp = ceil(geometric_sample(p)); + double n_tmp = geometric_sample(p); + + /* Must be an integer. (XXX -Wfloat-equal) */ + tor_assert(ceil(n_tmp) <= n_tmp && ceil(n_tmp) >= n_tmp); + + /* Must be a positive integer. */ + tor_assert(n_tmp >= 1); + + /* Probability of getting a value in the billions is negligible. */ + tor_assert(n_tmp <= (double)UINT_MAX); + unsigned n = (unsigned) n_tmp; if (n > PSI_DF) -- cgit v1.2.3-54-g00ecf From 0f8253bddbaae4e73fe2ff9ecf1c342e3f66b798 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 10 Jan 2019 17:40:17 +0000 Subject: Use the distribution abstraction as an abstraction. --- src/core/or/circuitpadding.c | 16 +-- src/lib/math/prob_distr.c | 229 ++++++++++++++++++++++++++++++------------- src/lib/math/prob_distr.h | 46 +++------ src/test/test_prob_distr.c | 22 +++-- 4 files changed, 196 insertions(+), 117 deletions(-) diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index 6a39a7b371..a5d5d24551 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -545,7 +545,7 @@ circpad_distribution_sample(circpad_distribution_t dist) .a = dist.param1, .b = dist.param2, }; - return uniform_sample(&my_uniform.base); + return dist_sample(&my_uniform.base); } case CIRCPAD_DIST_LOGISTIC: { @@ -555,7 +555,7 @@ circpad_distribution_sample(circpad_distribution_t dist) .mu = dist.param1, .sigma = dist.param2, }; - return logistic_sample(&my_logistic.base); + return dist_sample(&my_logistic.base); } case CIRCPAD_DIST_LOG_LOGISTIC: { @@ -565,12 +565,16 @@ circpad_distribution_sample(circpad_distribution_t dist) .alpha = dist.param1, .beta = dist.param2, }; - return log_logistic_sample(&my_log_logistic.base); + return dist_sample(&my_log_logistic.base); } case CIRCPAD_DIST_GEOMETRIC: { /* param1 is 'p' (success probability) */ - return geometric_sample(dist.param1); + const struct geometric my_geometric = { + .base = DIST_BASE(&geometric_ops), + .p = dist.param1, + }; + return dist_sample(&my_geometric.base); } case CIRCPAD_DIST_WEIBULL: { @@ -580,7 +584,7 @@ circpad_distribution_sample(circpad_distribution_t dist) .k = dist.param1, .lambda = dist.param2, }; - return weibull_sample(&my_weibull.base); + return dist_sample(&my_weibull.base); } case CIRCPAD_DIST_PARETO: { @@ -591,7 +595,7 @@ circpad_distribution_sample(circpad_distribution_t dist) .sigma = dist.param1, .xi = dist.param2, }; - return genpareto_sample(&my_genpareto.base); + return dist_sample(&my_genpareto.base); } } diff --git a/src/lib/math/prob_distr.c b/src/lib/math/prob_distr.c index e170d000fe..4263ba2074 100644 --- a/src/lib/math/prob_distr.c +++ b/src/lib/math/prob_distr.c @@ -1319,17 +1319,45 @@ sample_geometric(uint32_t s, double p0, double p) * (sample/cdf/sf/icdf/isf) as part of its dist_ops structure. */ -/** Functions for uniform distribution */ -const struct dist_ops uniform_ops = { - .name = "uniform", - .sample = uniform_sample, - .cdf = uniform_cdf, - .sf = uniform_sf, - .icdf = uniform_icdf, - .isf = uniform_isf, -}; +const char * +dist_name(const struct dist *dist) +{ + return dist->ops->name; +} + +double +dist_sample(const struct dist *dist) +{ + return dist->ops->sample(dist); +} + +double +dist_cdf(const struct dist *dist, double x) +{ + return dist->ops->cdf(dist, x); +} + +double +dist_sf(const struct dist *dist, double x) +{ + return dist->ops->sf(dist, x); +} double +dist_icdf(const struct dist *dist, double p) +{ + return dist->ops->icdf(dist, p); +} + +double +dist_isf(const struct dist *dist, double p) +{ + return dist->ops->isf(dist, p); +} + +/** Functions for uniform distribution */ + +static double uniform_sample(const struct dist *dist) { const struct uniform *U = const_container_of(dist, struct uniform, @@ -1339,7 +1367,7 @@ uniform_sample(const struct dist *dist) return sample_uniform_interval(p0, U->a, U->b); } -double +static double uniform_cdf(const struct dist *dist, double x) { const struct uniform *U = const_container_of(dist, struct uniform, @@ -1353,7 +1381,7 @@ uniform_cdf(const struct dist *dist, double x) return 1; } -double +static double uniform_sf(const struct dist *dist, double x) { const struct uniform *U = const_container_of(dist, struct uniform, @@ -1367,7 +1395,7 @@ uniform_sf(const struct dist *dist, double x) return 1; } -double +static double uniform_icdf(const struct dist *dist, double p) { const struct uniform *U = const_container_of(dist, struct uniform, @@ -1377,7 +1405,7 @@ uniform_icdf(const struct dist *dist, double p) return (p < 0.5 ? (U->a + w*p) : (U->b - w*(1 - p))); } -double +static double uniform_isf(const struct dist *dist, double p) { const struct uniform *U = const_container_of(dist, struct uniform, @@ -1387,17 +1415,18 @@ uniform_isf(const struct dist *dist, double p) return (p < 0.5 ? (U->b - w*p) : (U->a + w*(1 - p))); } -/** Functions for logistic distribution: */ -const struct dist_ops logistic_ops = { - .name = "logistic", - .sample = logistic_sample, - .cdf = logistic_cdf, - .sf = logistic_sf, - .icdf = logistic_icdf, - .isf = logistic_isf, +const struct dist_ops uniform_ops = { + .name = "uniform", + .sample = uniform_sample, + .cdf = uniform_cdf, + .sf = uniform_sf, + .icdf = uniform_icdf, + .isf = uniform_isf, }; -double +/** Functions for logistic distribution: */ + +static double logistic_sample(const struct dist *dist) { const struct logistic *L = const_container_of(dist, struct logistic, @@ -1409,7 +1438,7 @@ logistic_sample(const struct dist *dist) return sample_logistic_locscale(s, t, p0, L->mu, L->sigma); } -double +static double logistic_cdf(const struct dist *dist, double x) { const struct logistic *L = const_container_of(dist, struct logistic, @@ -1418,7 +1447,7 @@ logistic_cdf(const struct dist *dist, double x) return cdf_logistic(x, L->mu, L->sigma); } -double +static double logistic_sf(const struct dist *dist, double x) { const struct logistic *L = const_container_of(dist, struct logistic, @@ -1427,7 +1456,7 @@ logistic_sf(const struct dist *dist, double x) return sf_logistic(x, L->mu, L->sigma); } -double +static double logistic_icdf(const struct dist *dist, double p) { const struct logistic *L = const_container_of(dist, struct logistic, @@ -1436,7 +1465,7 @@ logistic_icdf(const struct dist *dist, double p) return icdf_logistic(p, L->mu, L->sigma); } -double +static double logistic_isf(const struct dist *dist, double p) { const struct logistic *L = const_container_of(dist, struct logistic, @@ -1445,17 +1474,18 @@ logistic_isf(const struct dist *dist, double p) return isf_logistic(p, L->mu, L->sigma); } -/** Functions for log-logistic distribution: */ -const struct dist_ops log_logistic_ops = { - .name = "log logistic", - .sample = log_logistic_sample, - .cdf = log_logistic_cdf, - .sf = log_logistic_sf, - .icdf = log_logistic_icdf, - .isf = log_logistic_isf, +const struct dist_ops logistic_ops = { + .name = "logistic", + .sample = logistic_sample, + .cdf = logistic_cdf, + .sf = logistic_sf, + .icdf = logistic_icdf, + .isf = logistic_isf, }; -double +/** Functions for log-logistic distribution: */ + +static double log_logistic_sample(const struct dist *dist) { const struct log_logistic *LL = const_container_of(dist, struct @@ -1466,7 +1496,7 @@ log_logistic_sample(const struct dist *dist) return sample_log_logistic_scaleshape(s, p0, LL->alpha, LL->beta); } -double +static double log_logistic_cdf(const struct dist *dist, double x) { const struct log_logistic *LL = const_container_of(dist, @@ -1475,7 +1505,7 @@ log_logistic_cdf(const struct dist *dist, double x) return cdf_log_logistic(x, LL->alpha, LL->beta); } -double +static double log_logistic_sf(const struct dist *dist, double x) { const struct log_logistic *LL = const_container_of(dist, @@ -1484,7 +1514,7 @@ log_logistic_sf(const struct dist *dist, double x) return sf_log_logistic(x, LL->alpha, LL->beta); } -double +static double log_logistic_icdf(const struct dist *dist, double p) { const struct log_logistic *LL = const_container_of(dist, @@ -1493,7 +1523,7 @@ log_logistic_icdf(const struct dist *dist, double p) return icdf_log_logistic(p, LL->alpha, LL->beta); } -double +static double log_logistic_isf(const struct dist *dist, double p) { const struct log_logistic *LL = const_container_of(dist, @@ -1502,17 +1532,18 @@ log_logistic_isf(const struct dist *dist, double p) return isf_log_logistic(p, LL->alpha, LL->beta); } -/** Functions for Weibull distribution */ -const struct dist_ops weibull_ops = { - .name = "Weibull", - .sample = weibull_sample, - .cdf = weibull_cdf, - .sf = weibull_sf, - .icdf = weibull_icdf, - .isf = weibull_isf, +const struct dist_ops log_logistic_ops = { + .name = "log logistic", + .sample = log_logistic_sample, + .cdf = log_logistic_cdf, + .sf = log_logistic_sf, + .icdf = log_logistic_icdf, + .isf = log_logistic_isf, }; -double +/** Functions for Weibull distribution */ + +static double weibull_sample(const struct dist *dist) { const struct weibull *W = const_container_of(dist, struct weibull, @@ -1523,7 +1554,7 @@ weibull_sample(const struct dist *dist) return sample_weibull(s, p0, W->lambda, W->k); } -double +static double weibull_cdf(const struct dist *dist, double x) { const struct weibull *W = const_container_of(dist, struct weibull, @@ -1532,7 +1563,7 @@ weibull_cdf(const struct dist *dist, double x) return cdf_weibull(x, W->lambda, W->k); } -double +static double weibull_sf(const struct dist *dist, double x) { const struct weibull *W = const_container_of(dist, struct weibull, @@ -1541,7 +1572,7 @@ weibull_sf(const struct dist *dist, double x) return sf_weibull(x, W->lambda, W->k); } -double +static double weibull_icdf(const struct dist *dist, double p) { const struct weibull *W = const_container_of(dist, struct weibull, @@ -1550,7 +1581,7 @@ weibull_icdf(const struct dist *dist, double p) return icdf_weibull(p, W->lambda, W->k); } -double +static double weibull_isf(const struct dist *dist, double p) { const struct weibull *W = const_container_of(dist, struct weibull, @@ -1559,17 +1590,18 @@ weibull_isf(const struct dist *dist, double p) return isf_weibull(p, W->lambda, W->k); } -/** Functions for generalized Pareto distributions */ -const struct dist_ops genpareto_ops = { - .name = "generalized Pareto", - .sample = genpareto_sample, - .cdf = genpareto_cdf, - .sf = genpareto_sf, - .icdf = genpareto_icdf, - .isf = genpareto_isf, +const struct dist_ops weibull_ops = { + .name = "Weibull", + .sample = weibull_sample, + .cdf = weibull_cdf, + .sf = weibull_sf, + .icdf = weibull_icdf, + .isf = weibull_isf, }; -double +/** Functions for generalized Pareto distributions */ + +static double genpareto_sample(const struct dist *dist) { const struct genpareto *GP = const_container_of(dist, struct genpareto, @@ -1580,7 +1612,7 @@ genpareto_sample(const struct dist *dist) return sample_genpareto_locscale(s, p0, GP->mu, GP->sigma, GP->xi); } -double +static double genpareto_cdf(const struct dist *dist, double x) { const struct genpareto *GP = const_container_of(dist, struct genpareto, @@ -1589,7 +1621,7 @@ genpareto_cdf(const struct dist *dist, double x) return cdf_genpareto(x, GP->mu, GP->sigma, GP->xi); } -double +static double genpareto_sf(const struct dist *dist, double x) { const struct genpareto *GP = const_container_of(dist, struct genpareto, @@ -1598,7 +1630,7 @@ genpareto_sf(const struct dist *dist, double x) return sf_genpareto(x, GP->mu, GP->sigma, GP->xi); } -double +static double genpareto_icdf(const struct dist *dist, double p) { const struct genpareto *GP = const_container_of(dist, struct genpareto, @@ -1607,7 +1639,7 @@ genpareto_icdf(const struct dist *dist, double p) return icdf_genpareto(p, GP->mu, GP->sigma, GP->xi); } -double +static double genpareto_isf(const struct dist *dist, double p) { const struct genpareto *GP = const_container_of(dist, struct genpareto, @@ -1616,13 +1648,70 @@ genpareto_isf(const struct dist *dist, double p) return isf_genpareto(p, GP->mu, GP->sigma, GP->xi); } -/* Deterministically sample from the geometric distribution with - * per-trial success probability p. */ -double -geometric_sample(double p) +const struct dist_ops genpareto_ops = { + .name = "generalized Pareto", + .sample = genpareto_sample, + .cdf = genpareto_cdf, + .sf = genpareto_sf, + .icdf = genpareto_icdf, + .isf = genpareto_isf, +}; + +/** Functions for geometric distribution on number of trials before success */ + +static double +geometric_sample(const struct dist *dist) { + const struct geometric *G = const_container_of(dist, struct geometric, base); uint32_t s = crypto_rand_u32(); double p0 = random_uniform_01(); - return sample_geometric(s, p0, p); + + return sample_geometric(s, p0, G->p); } +static double +geometric_cdf(const struct dist *dist, double x) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + if (x < 1) + return 0; + /* 1 - (1 - p)^floor(x) = 1 - e^{floor(x) log(1 - p)} */ + return -expm1(floor(x)*log1p(-G->p)); +} + +static double +geometric_sf(const struct dist *dist, double x) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + if (x < 1) + return 0; + /* (1 - p)^floor(x) = e^{ceil(x) log(1 - p)} */ + return exp(floor(x)*log1p(-G->p)); +} + +static double +geometric_icdf(const struct dist *dist, double p) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + return log1p(-p)/log1p(-G->p); +} + +static double +geometric_isf(const struct dist *dist, double p) +{ + const struct geometric *G = const_container_of(dist, struct geometric, base); + + return log(p)/log1p(-G->p); +} + +const struct dist_ops geometric_ops = { + .name = "geometric (1-based)", + .sample = geometric_sample, + .cdf = geometric_cdf, + .sf = geometric_sf, + .icdf = geometric_icdf, + .isf = geometric_isf, +}; diff --git a/src/lib/math/prob_distr.h b/src/lib/math/prob_distr.h index c2fd6c74b3..981fc2017d 100644 --- a/src/lib/math/prob_distr.h +++ b/src/lib/math/prob_distr.h @@ -21,6 +21,13 @@ struct dist { #define DIST_BASE(OPS) { .ops = (OPS) } +const char *dist_name(const struct dist *); +double dist_sample(const struct dist *); +double dist_cdf(const struct dist *, double x); +double dist_sf(const struct dist *, double x); +double dist_icdf(const struct dist *, double p); +double dist_isf(const struct dist *, double p); + struct dist_ops { const char *name; double (*sample)(const struct dist *); @@ -30,9 +37,14 @@ struct dist_ops { double (*isf)(const struct dist *, double p); }; -/* Geometric distribution */ +/* Geometric distribution on positive number of trials before first success */ -double geometric_sample(double p); +struct geometric { + struct dist base; + double p; /* success probability */ +}; + +extern const struct dist_ops geometric_ops; /* Pareto distribution */ @@ -43,12 +55,6 @@ struct genpareto { double xi; }; -double genpareto_sample(const struct dist *dist); -double genpareto_cdf(const struct dist *dist, double x); -double genpareto_sf(const struct dist *dist, double x); -double genpareto_icdf(const struct dist *dist, double p); -double genpareto_isf(const struct dist *dist, double p); - extern const struct dist_ops genpareto_ops; /* Weibull distribution */ @@ -59,12 +65,6 @@ struct weibull { double k; }; -double weibull_sample(const struct dist *dist); -double weibull_cdf(const struct dist *dist, double x); -double weibull_sf(const struct dist *dist, double x); -double weibull_icdf(const struct dist *dist, double p); -double weibull_isf(const struct dist *dist, double p); - extern const struct dist_ops weibull_ops; /* Log-logistic distribution */ @@ -75,12 +75,6 @@ struct log_logistic { double beta; }; -double log_logistic_sample(const struct dist *dist); -double log_logistic_cdf(const struct dist *dist, double x); -double log_logistic_sf(const struct dist *dist, double x); -double log_logistic_icdf(const struct dist *dist, double p); -double log_logistic_isf(const struct dist *dist, double p); - extern const struct dist_ops log_logistic_ops; /* Logistic distribution */ @@ -91,12 +85,6 @@ struct logistic { double sigma; }; -double logistic_sample(const struct dist *dist); -double logistic_cdf(const struct dist *dist, double x); -double logistic_sf(const struct dist *dist, double x); -double logistic_icdf(const struct dist *dist, double p); -double logistic_isf(const struct dist *dist, double p); - extern const struct dist_ops logistic_ops; /* Uniform distribution */ @@ -107,12 +95,6 @@ struct uniform { double b; }; -double uniform_sample(const struct dist *dist); -double uniform_cdf(const struct dist *dist, double x); -double uniform_sf(const struct dist *dist, double x); -double uniform_icdf(const struct dist *dist, double p); -double uniform_isf(const struct dist *dist, double p); - extern const struct dist_ops uniform_ops; /** Only by unittests */ diff --git a/src/test/test_prob_distr.c b/src/test/test_prob_distr.c index ec4e943e9a..fe3969518c 100644 --- a/src/test/test_prob_distr.c +++ b/src/test/test_prob_distr.c @@ -942,6 +942,10 @@ psi_test(const size_t C[PSI_DF], const double logP[PSI_DF], size_t N) static bool test_stochastic_geometric_impl(double p) { + const struct geometric geometric = { + .base = DIST_BASE(&geometric_ops), + .p = p, + }; double logP[PSI_DF] = {0}; unsigned ntry = NTRIALS, npass = 0; unsigned i; @@ -958,7 +962,7 @@ test_stochastic_geometric_impl(double p) size_t C[PSI_DF] = {0}; for (j = 0; j < NSAMPLES; j++) { - double n_tmp = geometric_sample(p); + double n_tmp = dist_sample(&geometric.base); /* Must be an integer. (XXX -Wfloat-equal) */ tor_assert(ceil(n_tmp) <= n_tmp && ceil(n_tmp) >= n_tmp); @@ -1006,10 +1010,10 @@ test_stochastic_geometric_impl(double p) static void bin_cdfs(const struct dist *dist, double lo, double hi, double *logP, size_t n) { -#define CDF(x) dist->ops->cdf(dist, x) -#define SF(x) dist->ops->sf(dist, x) +#define CDF(x) dist_cdf(dist, x) +#define SF(x) dist_sf(dist, x) const double w = (hi - lo)/(n - 2); - double halfway = dist->ops->icdf(dist, 0.5); + double halfway = dist_icdf(dist, 0.5); double x_0, x_1; size_t i; size_t n2 = ceil_to_size_t((halfway - lo)/w); @@ -1057,7 +1061,7 @@ bin_samples(const struct dist *dist, double lo, double hi, size_t *C, size_t n) size_t i; for (i = 0; i < NSAMPLES; i++) { - double x = dist->ops->sample(dist); + double x = dist_sample(dist); size_t bin; if (x < lo) @@ -1084,8 +1088,8 @@ test_psi_dist_sample(const struct dist *dist) { double logP[PSI_DF] = {0}; unsigned ntry = NTRIALS, npass = 0; - double lo = dist->ops->icdf(dist, 1/(double)(PSI_DF + 2)); - double hi = dist->ops->isf(dist, 1/(double)(PSI_DF + 2)); + double lo = dist_icdf(dist, 1/(double)(PSI_DF + 2)); + double hi = dist_isf(dist, 1/(double)(PSI_DF + 2)); /* Create the null hypothesis in logP */ bin_cdfs(dist, lo, hi, logP, PSI_DF); @@ -1102,10 +1106,10 @@ test_psi_dist_sample(const struct dist *dist) /* Did we fail or succeed? */ if (npass >= NPASSES_MIN) { - /* printf("pass %s sampler\n", dist->ops->name);*/ + /* printf("pass %s sampler\n", dist_name(dist));*/ return true; } else { - printf("fail %s sampler\n", dist->ops->name); + printf("fail %s sampler\n", dist_name(dist)); return false; } } -- cgit v1.2.3-54-g00ecf From d82a8a7f9d268728b2447b2dbbaa346140784f9b Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 10 Jan 2019 18:08:20 +0000 Subject: Add some more type checking. NOTE: This commit breaks the build, because there was a mistake in an earlier change of exactly the sort that this is meant to detect! I'm leaving it broken for illustration. --- src/core/or/circuitpadding.c | 12 ++++++------ src/lib/math/prob_distr.h | 20 ++++++++++++++++++++ src/test/test_prob_distr.c | 22 +++++++++++----------- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index a5d5d24551..4080614338 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -541,7 +541,7 @@ circpad_distribution_sample(circpad_distribution_t dist) { // param2 is upper bound, param1 is lower const struct uniform my_uniform = { - .base = DIST_BASE(&uniform_ops), + .base = UNIFORM(my_uniform), .a = dist.param1, .b = dist.param2, }; @@ -551,7 +551,7 @@ circpad_distribution_sample(circpad_distribution_t dist) { /* param1 is Mu, param2 is sigma. */ const struct logistic my_logistic = { - .base = DIST_BASE(&uniform_ops), + .base = LOGISTIC(my_uniform), .mu = dist.param1, .sigma = dist.param2, }; @@ -561,7 +561,7 @@ circpad_distribution_sample(circpad_distribution_t dist) { /* param1 is Alpha, param2 is 1.0/Beta */ const struct log_logistic my_log_logistic = { - .base = DIST_BASE(&log_logistic_ops), + .base = LOG_LOGISTIC(my_log_logistic), .alpha = dist.param1, .beta = dist.param2, }; @@ -571,7 +571,7 @@ circpad_distribution_sample(circpad_distribution_t dist) { /* param1 is 'p' (success probability) */ const struct geometric my_geometric = { - .base = DIST_BASE(&geometric_ops), + .base = GEOMETRIC(my_geometric), .p = dist.param1, }; return dist_sample(&my_geometric.base); @@ -580,7 +580,7 @@ circpad_distribution_sample(circpad_distribution_t dist) { /* param1 is k, param2 is Lambda */ const struct weibull my_weibull = { - .base = DIST_BASE(&weibull_ops), + .base = WEIBULL(my_weibull), .k = dist.param1, .lambda = dist.param2, }; @@ -590,7 +590,7 @@ circpad_distribution_sample(circpad_distribution_t dist) { /* param1 is sigma, param2 is xi, no more params for mu so we use 0 */ const struct genpareto my_genpareto = { - .base = DIST_BASE(&weibull_ops), + .base = GENPARETO(my_weibull), .mu = 0, .sigma = dist.param1, .xi = dist.param2, diff --git a/src/lib/math/prob_distr.h b/src/lib/math/prob_distr.h index 981fc2017d..66acb796fd 100644 --- a/src/lib/math/prob_distr.h +++ b/src/lib/math/prob_distr.h @@ -20,6 +20,8 @@ struct dist { }; #define DIST_BASE(OPS) { .ops = (OPS) } +#define DIST_BASE_TYPED(OPS, OBJ, TYPE) \ + DIST_BASE((OPS) + 0*sizeof(&(OBJ) - (const TYPE *)&(OBJ))) const char *dist_name(const struct dist *); double dist_sample(const struct dist *); @@ -46,6 +48,9 @@ struct geometric { extern const struct dist_ops geometric_ops; +#define GEOMETRIC(OBJ) \ + DIST_BASE_TYPED(&geometric_ops, OBJ, struct geometric) + /* Pareto distribution */ struct genpareto { @@ -57,6 +62,9 @@ struct genpareto { extern const struct dist_ops genpareto_ops; +#define GENPARETO(OBJ) \ + DIST_BASE_TYPED(&genpareto_ops, OBJ, struct genpareto) + /* Weibull distribution */ struct weibull { @@ -67,6 +75,9 @@ struct weibull { extern const struct dist_ops weibull_ops; +#define WEIBULL(OBJ) \ + DIST_BASE_TYPED(&weibull_ops, OBJ, struct weibull) + /* Log-logistic distribution */ struct log_logistic { @@ -77,6 +88,9 @@ struct log_logistic { extern const struct dist_ops log_logistic_ops; +#define LOG_LOGISTIC(OBJ) \ + DIST_BASE_TYPED(&log_logistic_ops, OBJ, struct log_logistic) + /* Logistic distribution */ struct logistic { @@ -87,6 +101,9 @@ struct logistic { extern const struct dist_ops logistic_ops; +#define LOGISTIC(OBJ) \ + DIST_BASE_TYPED(&logistic_ops, OBJ, struct logistic) + /* Uniform distribution */ struct uniform { @@ -97,6 +114,9 @@ struct uniform { extern const struct dist_ops uniform_ops; +#define UNIFORM(OBJ) \ + DIST_BASE_TYPED(&uniform_ops, OBJ, struct uniform) + /** Only by unittests */ #ifdef PROB_DISTR_PRIVATE diff --git a/src/test/test_prob_distr.c b/src/test/test_prob_distr.c index fe3969518c..ff23f01033 100644 --- a/src/test/test_prob_distr.c +++ b/src/test/test_prob_distr.c @@ -943,7 +943,7 @@ static bool test_stochastic_geometric_impl(double p) { const struct geometric geometric = { - .base = DIST_BASE(&geometric_ops), + .base = GEOMETRIC(geometric), .p = p, }; double logP[PSI_DF] = {0}; @@ -1151,32 +1151,32 @@ test_stochastic_uniform(void *arg) (void) arg; const struct uniform uniform01 = { - .base = DIST_BASE(&uniform_ops), + .base = UNIFORM(uniform01), .a = 0, .b = 1, }; const struct uniform uniform_pos = { - .base = DIST_BASE(&uniform_ops), + .base = UNIFORM(uniform_pos), .a = 1.23, .b = 4.56, }; const struct uniform uniform_neg = { - .base = DIST_BASE(&uniform_ops), + .base = UNIFORM(uniform_neg), .a = -10, .b = -1, }; const struct uniform uniform_cross = { - .base = DIST_BASE(&uniform_ops), + .base = UNIFORM(uniform_cross), .a = -1.23, .b = 4.56, }; const struct uniform uniform_subnormal = { - .base = DIST_BASE(&uniform_ops), + .base = UNIFORM(uniform_subnormal), .a = 4e-324, .b = 4e-310, }; const struct uniform uniform_subnormal_cross = { - .base = DIST_BASE(&uniform_ops), + .base = UNIFORM(uniform_subnormal_cross), .a = -4e-324, .b = 4e-310, }; @@ -1202,7 +1202,7 @@ static bool test_stochastic_logistic_impl(double mu, double sigma) { const struct logistic dist = { - .base = DIST_BASE(&logistic_ops), + .base = LOGISTIC(dist), .mu = mu, .sigma = sigma, }; @@ -1215,7 +1215,7 @@ static bool test_stochastic_log_logistic_impl(double alpha, double beta) { const struct log_logistic dist = { - .base = DIST_BASE(&log_logistic_ops), + .base = LOG_LOGISTIC(dist), .alpha = alpha, .beta = beta, }; @@ -1228,7 +1228,7 @@ static bool test_stochastic_weibull_impl(double lambda, double k) { const struct weibull dist = { - .base = DIST_BASE(&weibull_ops), + .base = WEIBULL(dist), .lambda = lambda, .k = k, }; @@ -1248,7 +1248,7 @@ static bool test_stochastic_genpareto_impl(double mu, double sigma, double xi) { const struct genpareto dist = { - .base = DIST_BASE(&genpareto_ops), + .base = GENPARETO(dist), .mu = mu, .sigma = sigma, .xi = xi, -- cgit v1.2.3-54-g00ecf From 9728d3f8ac395d7157d30f9b73117b58d704432c Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 10 Jan 2019 18:11:36 +0000 Subject: Fix wrong bases. --- src/core/or/circuitpadding.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index 4080614338..36fcbb5030 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -551,7 +551,7 @@ circpad_distribution_sample(circpad_distribution_t dist) { /* param1 is Mu, param2 is sigma. */ const struct logistic my_logistic = { - .base = LOGISTIC(my_uniform), + .base = LOGISTIC(my_logistic), .mu = dist.param1, .sigma = dist.param2, }; @@ -590,7 +590,7 @@ circpad_distribution_sample(circpad_distribution_t dist) { /* param1 is sigma, param2 is xi, no more params for mu so we use 0 */ const struct genpareto my_genpareto = { - .base = GENPARETO(my_weibull), + .base = GENPARETO(my_genpareto), .mu = 0, .sigma = dist.param1, .xi = dist.param2, -- cgit v1.2.3-54-g00ecf From 4dc189a9047931599f524a7c7dc5b447f3915409 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Fri, 11 Jan 2019 12:06:14 +0200 Subject: Clarify immutability of global padding machine specs. --- src/core/or/circuit_st.h | 4 +++- src/core/or/circuitpadding.c | 12 ++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/core/or/circuit_st.h b/src/core/or/circuit_st.h index 47639940d4..29bcaa098f 100644 --- a/src/core/or/circuit_st.h +++ b/src/core/or/circuit_st.h @@ -185,7 +185,9 @@ struct circuit_t { /** Adaptive Padding state machines: these are immutable. The state machines * that come from the consensus are saved to a global structure, to avoid - * per-circuit allocations. This merely points to the global copy. + * per-circuit allocations. This merely points to the global copy in + * origin_padding_machines or relay_padding_machines that should never + * change or get deallocated. * * Each element of this array corresponds to a different padding machine, * and we can have up to CIRCPAD_MAX_MACHINES such machines. */ diff --git a/src/core/or/circuitpadding.c b/src/core/or/circuitpadding.c index 36fcbb5030..0dadc52139 100644 --- a/src/core/or/circuitpadding.c +++ b/src/core/or/circuitpadding.c @@ -87,11 +87,19 @@ static uint64_t circpad_global_padding_sent; static uint64_t circpad_global_nonpadding_sent; /** This is the list of circpad_machine_spec_t's parsed from consensus and - * torrc that have origin_side == 1 (ie: are for client side) */ + * torrc that have origin_side == 1 (ie: are for client side). + * + * The machines in this smartlist are considered immutable and they are used + * as-is by circuits so they should not change or get deallocated in Tor's + * runtime and as long as circuits are alive. */ STATIC smartlist_t *origin_padding_machines = NULL; /** This is the list of circpad_machine_spec_t's parsed from consensus and - * torrc that have origin_side == 0 (ie: are for relay side) */ + * torrc that have origin_side == 0 (ie: are for relay side). + * + * The machines in this smartlist are considered immutable and they are used + * as-is by circuits so they should not change or get deallocated in Tor's + * runtime and as long as circuits are alive. */ STATIC smartlist_t *relay_padding_machines = NULL; /** Loop over the current padding state machines using loop_var as the -- cgit v1.2.3-54-g00ecf From b269ab5aaeee65a3a0b1e5e0923d9dc7898c232e Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Mon, 14 Jan 2019 15:07:19 +0200 Subject: Remove TODO file. All remaining tasks are now part of #28632. --- PADDING_TODO.txt | 58 -------------------------------------------------------- 1 file changed, 58 deletions(-) delete mode 100644 PADDING_TODO.txt diff --git a/PADDING_TODO.txt b/PADDING_TODO.txt deleted file mode 100644 index a2e97f7ca3..0000000000 --- a/PADDING_TODO.txt +++ /dev/null @@ -1,58 +0,0 @@ -TODO sketch for this branch, in rough priority order: - -- Clean up/fix XXX's and FIXMEs - - Test event entry points into circuitpad? - - Most of our events come from completely untested code :/ - -- Compat-breaking changes to be decided/done ASAP - - Option to keep circuits open if machine present - - Specify an ordered preference list of padding machines - - Specify exit policy for machine conditions? - - short_policy_t looks good, except for its flexible array member :/ - - Can we make our own struct with a small, fixed number of policy - entries? Say 3-4? Or is that a bad idea to lose this flexibility? - - Check conditions based on attached streams on the circuit - - Accept should mean "only apply if matched" - - Reject should mean "don't apply if matched" - - If a policy is specified, Reject *:* is implicit default (so reject - policies need an Accept entry). - - With no policy, Accept *:* is implicit default. - - -- Misc fixes: - - Remove circuitsetup machine (but place it in unittests -- they depend on it) - - Circuit RTT measurement will break on var_cell/EXTEND2 cells - - Are there any heuristics we can use here? - - If RELAY_EARLY is only for the first cell of an EXTEND2 series, - we can use that. But the proposal currently says MAY, but not MUST - for this behavior. - -======== 0.3.6 ======== - -- Come up with some good histograms for eg circuit setup fingerprinting, - website fingerprinting, and vanguards usage. - -- Vanguards compatibility for MiddleNodes (via changes to vanguards addon) - -- circpad_machine_validate() function to sanity-check histograms loaded from - consensus/torrc (can also be used to help guide a GA). - - Check bin construction - - no type overflow (start_usec + range_sec, etc) - - no conflicting state transitions (or overlap with cancel events) - - no use of both histograms and iat_dist - - at least two histogram bins - - min_hop vs target_hop - -- Support torrc load+serialization of state machines - - ?? - -- Support consensus load+serialization of state machines - - ?? - -- Prop #265 load balancing - -- Rephist timer stats - - Is this a privacy risk? The adversary could create lots of circuits - to find a layer2 vanguard.. Otherwise they will be spread across middles. - - -- cgit v1.2.3-54-g00ecf